Chain framegraph semaphores together
Quick RenderDoc check suggests that this works as intended :-)
This commit is contained in:
parent
ee24cd4903
commit
9008d08d9f
@ -1,6 +1,6 @@
|
||||
render_targets {
|
||||
color0 {
|
||||
format R8G8B8A8_SRGB;
|
||||
format B8G8R8A8_SRGB;
|
||||
width 1024;
|
||||
height 768;
|
||||
sample_count 4;
|
||||
|
@ -20,7 +20,7 @@ void UpdateThreadEntry(void *param) {
|
||||
rtWaitOnSemaphore(&g_main_loop.update_proceed);
|
||||
//rtLog("UT", "Processing %u", g_main_loop.u_frame_id);
|
||||
|
||||
(g_main_loop.GameUpdate)();
|
||||
(g_main_loop.GameUpdate)(g_main_loop.u_frame_id);
|
||||
|
||||
//rtLog("UT", "Finished %u", g_main_loop.u_frame_id);
|
||||
g_main_loop.u_frame_id += 1;
|
||||
@ -40,7 +40,7 @@ void RenderThreadEntry(void *param) {
|
||||
//rtLog("RT", "Processing %u", g_main_loop.r_frame_id);
|
||||
|
||||
rtBeginGFXFrame(g_main_loop.r_frame_id);
|
||||
(g_main_loop.GameRender)();
|
||||
(g_main_loop.GameRender)(g_main_loop.r_frame_id);
|
||||
rtEndGFXFrame(g_main_loop.r_frame_id);
|
||||
|
||||
//rtLog("RT", "Finished %u", g_main_loop.r_frame_id);
|
||||
|
@ -4,8 +4,8 @@
|
||||
#include "runtime/runtime.h"
|
||||
#include "runtime/threading.h"
|
||||
|
||||
typedef void rt_main_loop_update_fn(void);
|
||||
typedef void rt_main_loop_render_fn(void);
|
||||
typedef void rt_main_loop_update_fn(unsigned int frame_id);
|
||||
typedef void rt_main_loop_render_fn(unsigned int frame_id);
|
||||
|
||||
typedef struct {
|
||||
unsigned int u_frame_id;
|
||||
|
@ -3,8 +3,8 @@
|
||||
extern void RegisterCVars(void);
|
||||
extern void Init(void);
|
||||
extern void Shutdown(void);
|
||||
extern void Update(void);
|
||||
extern void Render(void);
|
||||
extern void Update(unsigned int);
|
||||
extern void Render(unsigned int);
|
||||
|
||||
static rt_app_callbacks _callbacks = {
|
||||
.RegisterCVars = RegisterCVars,
|
||||
|
@ -67,9 +67,10 @@ void Shutdown(void) {
|
||||
rtDestroyFramegraph(_framegraph);
|
||||
}
|
||||
|
||||
void Update(void) {
|
||||
void Update(unsigned int frame_id) {
|
||||
RT_UNUSED(frame_id);
|
||||
}
|
||||
|
||||
void Render(void) {
|
||||
rtExecuteFramegraph(_framegraph);
|
||||
void Render(unsigned int frame_id) {
|
||||
rtExecuteFramegraph(_framegraph, frame_id);
|
||||
}
|
@ -178,7 +178,7 @@ RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph,
|
||||
rt_render_pass_id pass,
|
||||
const rt_render_pass_bind_fns *bind_fns);
|
||||
|
||||
RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph);
|
||||
RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int frame_id);
|
||||
|
||||
/* Utility to turn a string into a usable render target id. */
|
||||
RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len);
|
||||
|
@ -24,6 +24,7 @@ typedef struct {
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int sample_count;
|
||||
rt_gpu_semaphore_handle semaphore;
|
||||
rt_render_target_handle api_render_target;
|
||||
} rt_render_target;
|
||||
|
||||
@ -32,6 +33,10 @@ typedef struct {
|
||||
rt_render_pass_type type;
|
||||
const char *name;
|
||||
int execution_level;
|
||||
|
||||
bool reads_swapchain;
|
||||
bool writes_swapchain;
|
||||
|
||||
unsigned int read_count;
|
||||
unsigned int write_count;
|
||||
rt_render_pass_bind_fns bound_fns;
|
||||
@ -243,6 +248,30 @@ CreateRenderPasses(rt_framegraph *graph, const rt_framegraph_info *info, rt_aren
|
||||
graph->passes[i].type = pass_info[i].type;
|
||||
graph->passes[i].name = NULL;
|
||||
|
||||
graph->passes[i].reads_swapchain = false;
|
||||
graph->passes[i].writes_swapchain = false;
|
||||
const rt_render_target_info *rts = rtResolveConstRelptr(&info->render_targets);
|
||||
for (unsigned int j = 0; j < graph->passes[i].read_count; ++j) {
|
||||
rt_render_target_id rt = graph->passes[i].reads[j].render_target;
|
||||
for (unsigned int k = 0; k < info->render_target_count; ++k) {
|
||||
if (rts[k].id == rt && rts[k].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN &&
|
||||
rts[k].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN &&
|
||||
rts[k].format == RT_PIXEL_FORMAT_SWAPCHAIN) {
|
||||
graph->passes[i].reads_swapchain = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned int j = 0; j < graph->passes[i].write_count; ++j) {
|
||||
rt_render_target_id rt = graph->passes[i].writes[j].render_target;
|
||||
for (unsigned int k = 0; k < info->render_target_count; ++k) {
|
||||
if (rts[k].id == rt && rts[k].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN &&
|
||||
rts[k].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN &&
|
||||
rts[k].format == RT_PIXEL_FORMAT_SWAPCHAIN) {
|
||||
graph->passes[i].writes_swapchain = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char *name = rtResolveConstRelptr(&pass_info[i].name);
|
||||
if (name) {
|
||||
size_t name_strlen = strlen(name);
|
||||
@ -288,6 +317,12 @@ CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_are
|
||||
graph->render_targets[i].height = render_targets[i].height;
|
||||
graph->render_targets[i].sample_count = render_targets[i].sample_count;
|
||||
|
||||
rt_gpu_semaphore_info sem_info = {
|
||||
.initial_value = 0,
|
||||
.name = NULL,
|
||||
};
|
||||
g_renderer.CreateSemaphores(1, &sem_info, &graph->render_targets[i].semaphore);
|
||||
|
||||
if (graph->render_targets[i].width != RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
||||
graph->render_targets[i].height != RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
||||
graph->render_targets[i].format != RT_PIXEL_FORMAT_SWAPCHAIN) {
|
||||
@ -536,7 +571,6 @@ BeginGraphicsPass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffe
|
||||
g_renderer.CmdBeginPass(cmdbuf, &begin_info);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
BeginComputePass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffer_handle cmdbuf) {
|
||||
const rt_render_target_write *writes = framegraph->passes[pass_idx].writes;
|
||||
@ -574,15 +608,64 @@ BeginComputePass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffer
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) {
|
||||
RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int frame_id) {
|
||||
int execution_level = framegraph->passes[0].execution_level;
|
||||
uint32_t level_start = 0;
|
||||
|
||||
rt_gpu_semaphore_handle swapchain_available = g_renderer.GetSwapchainAvailableSemaphore();
|
||||
rt_gpu_semaphore_handle render_finished = g_renderer.GetRenderFinishedSemaphore();
|
||||
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
if (!temp.arena) {
|
||||
rtLog("GFX", "Unable to execute framegraph because no temporary arena is available.");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Find the last pass that writes to the swapchain */
|
||||
uint32_t last_swapchain_write = 0;
|
||||
for (uint32_t i = framegraph->pass_count - 1; i > 0; --i) {
|
||||
if (framegraph->passes[i].writes_swapchain) {
|
||||
last_swapchain_write = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Find the first pass that reads the swapchain 0*/
|
||||
uint32_t first_swapchain_read = 0;
|
||||
for (uint32_t i = 0; framegraph->pass_count; ++i) {
|
||||
if (framegraph->passes[i].reads_swapchain || framegraph->passes[i].writes_swapchain) {
|
||||
first_swapchain_read = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Overflows after ~4.871x10^7 years */
|
||||
uint64_t signal_value_base = (uint64_t)frame_id * 100;
|
||||
|
||||
for (uint32_t i = 0; i <= framegraph->pass_count && level_start < framegraph->pass_count; ++i) {
|
||||
if ((i == framegraph->pass_count) ||
|
||||
(framegraph->passes[i].execution_level > execution_level)) {
|
||||
/* Dispatch all passes in the current execution level */
|
||||
|
||||
rt_temp_arena level_temp = rtBeginTempArena(temp.arena);
|
||||
|
||||
rt_gpu_semaphore_handle *graphics_wait_semaphores = NULL;
|
||||
rt_gpu_semaphore_handle *graphics_signal_semaphores = NULL;
|
||||
uint64_t *graphics_wait_values = NULL;
|
||||
uint64_t *graphics_signal_values = NULL;
|
||||
rt_command_buffer_handle *graphics_command_buffers = NULL;
|
||||
uint32_t graphics_command_buffer_count = 0;
|
||||
uint32_t graphics_signal_semaphore_count = 0;
|
||||
uint32_t graphics_wait_semaphore_count = 0;
|
||||
|
||||
rt_gpu_semaphore_handle *compute_wait_semaphores = NULL;
|
||||
rt_gpu_semaphore_handle *compute_signal_semaphores = NULL;
|
||||
rt_command_buffer_handle *compute_command_buffers = NULL;
|
||||
uint64_t *compute_wait_values = NULL;
|
||||
uint64_t *compute_signal_values = NULL;
|
||||
uint32_t compute_command_buffer_count = 0;
|
||||
uint32_t compute_signal_semaphore_count = 0;
|
||||
uint32_t compute_wait_semaphore_count = 0;
|
||||
|
||||
/* Determine necessary array sizes */
|
||||
for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) {
|
||||
bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL &&
|
||||
framegraph->passes[pass_idx].bound_fns.Execute != NULL &&
|
||||
@ -594,14 +677,74 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) {
|
||||
framegraph->passes[pass_idx].id);
|
||||
continue;
|
||||
}
|
||||
bool is_graphics_pass =
|
||||
framegraph->passes[pass_idx].type == RT_RENDER_PASS_TYPE_GRAPHICS;
|
||||
if (is_graphics_pass) {
|
||||
graphics_wait_semaphore_count += framegraph->passes[pass_idx].read_count;
|
||||
graphics_signal_semaphore_count += framegraph->passes[pass_idx].write_count;
|
||||
if (framegraph->passes[pass_idx].reads_swapchain)
|
||||
graphics_wait_semaphore_count += 1;
|
||||
if (framegraph->passes[pass_idx].writes_swapchain)
|
||||
graphics_signal_semaphore_count += 1;
|
||||
++graphics_command_buffer_count;
|
||||
} else {
|
||||
compute_wait_semaphore_count += framegraph->passes[pass_idx].read_count;
|
||||
compute_signal_semaphore_count += framegraph->passes[pass_idx].write_count;
|
||||
if (framegraph->passes[pass_idx].reads_swapchain)
|
||||
compute_wait_semaphore_count += 1;
|
||||
if (framegraph->passes[pass_idx].writes_swapchain)
|
||||
compute_signal_semaphore_count += 1;
|
||||
++compute_command_buffer_count;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
rt_render_pass_id id = framegraph->passes[pass_idx].id;
|
||||
graphics_wait_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_gpu_semaphore_handle,
|
||||
graphics_wait_semaphore_count);
|
||||
graphics_wait_values =
|
||||
RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, graphics_wait_semaphore_count);
|
||||
graphics_signal_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_gpu_semaphore_handle,
|
||||
graphics_signal_semaphore_count);
|
||||
graphics_signal_values =
|
||||
RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, graphics_signal_semaphore_count);
|
||||
graphics_command_buffers = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_command_buffer_handle,
|
||||
graphics_command_buffer_count);
|
||||
graphics_signal_semaphore_count = 0;
|
||||
graphics_wait_semaphore_count = 0;
|
||||
graphics_command_buffer_count = 0;
|
||||
|
||||
compute_wait_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_gpu_semaphore_handle,
|
||||
compute_wait_semaphore_count);
|
||||
compute_wait_values =
|
||||
RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, compute_wait_semaphore_count);
|
||||
compute_signal_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_gpu_semaphore_handle,
|
||||
compute_signal_semaphore_count);
|
||||
compute_signal_values =
|
||||
RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, compute_signal_semaphore_count);
|
||||
compute_command_buffers = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_command_buffer_handle,
|
||||
compute_command_buffer_count);
|
||||
compute_signal_semaphore_count = 0;
|
||||
compute_wait_semaphore_count = 0;
|
||||
compute_command_buffer_count = 0;
|
||||
|
||||
/* Dispatch all passes in the current execution level */
|
||||
for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) {
|
||||
bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL &&
|
||||
framegraph->passes[pass_idx].bound_fns.Execute != NULL &&
|
||||
framegraph->passes[pass_idx].bound_fns.Finalize != NULL;
|
||||
if (!pass_bound)
|
||||
continue;
|
||||
|
||||
// rt_render_pass_id id = framegraph->passes[pass_idx].id;
|
||||
const rt_render_target_write *writes = framegraph->passes[pass_idx].writes;
|
||||
const rt_render_target_read *reads = framegraph->passes[pass_idx].reads;
|
||||
uint32_t write_count = framegraph->passes[pass_idx].write_count;
|
||||
uint32_t read_count = framegraph->passes[pass_idx].read_count;
|
||||
*/
|
||||
|
||||
/* TODO(Kevin): Every one of these should be a job-dispatch*/
|
||||
|
||||
@ -646,9 +789,83 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) {
|
||||
g_renderer.CmdEndPass(cmdbuf);
|
||||
}
|
||||
|
||||
rt_submit_command_buffers_info submit = {.command_buffer_count = 1,
|
||||
.command_buffers = &cmdbuf};
|
||||
g_renderer.SubmitCommandBuffers(is_graphics_pass ? RT_GRAPHICS_QUEUE : RT_COMPUTE_QUEUE, &submit);
|
||||
rt_gpu_semaphore_handle *wait_semaphores = NULL, *signal_semaphores = NULL;
|
||||
uint64_t *wait_values = NULL, *signal_values = NULL;
|
||||
rt_command_buffer_handle *command_buffers = NULL;
|
||||
uint32_t *wait_count = NULL, *signal_count = 0;
|
||||
uint32_t *command_buffer_count = NULL;
|
||||
if (is_graphics_pass) {
|
||||
wait_semaphores = graphics_wait_semaphores;
|
||||
signal_semaphores = graphics_signal_semaphores;
|
||||
wait_values = graphics_wait_values;
|
||||
signal_values = graphics_signal_values;
|
||||
command_buffers = graphics_command_buffers;
|
||||
wait_count = &graphics_wait_semaphore_count;
|
||||
signal_count = &graphics_signal_semaphore_count;
|
||||
command_buffer_count = &graphics_command_buffer_count;
|
||||
} else {
|
||||
wait_semaphores = compute_wait_semaphores;
|
||||
signal_semaphores = compute_signal_semaphores;
|
||||
wait_values = compute_wait_values;
|
||||
signal_values = compute_signal_values;
|
||||
command_buffers = compute_command_buffers;
|
||||
wait_count = &compute_wait_semaphore_count;
|
||||
signal_count = &compute_signal_semaphore_count;
|
||||
command_buffer_count = &compute_command_buffer_count;
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < read_count; ++j) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, reads[j].render_target);
|
||||
wait_semaphores[*wait_count] = rt->semaphore;
|
||||
wait_values[*wait_count] = signal_value_base + execution_level;
|
||||
*wait_count += 1;
|
||||
}
|
||||
for (uint32_t j = 0; j < write_count; ++j) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target);
|
||||
signal_semaphores[*signal_count] = rt->semaphore;
|
||||
signal_values[*signal_count] = signal_value_base + execution_level + 1;
|
||||
*signal_count += 1;
|
||||
}
|
||||
if (pass_idx == first_swapchain_read) {
|
||||
wait_semaphores[*wait_count] = swapchain_available;
|
||||
wait_values[*wait_count] = 0;
|
||||
*wait_count += 1;
|
||||
}
|
||||
if (pass_idx == last_swapchain_write) {
|
||||
signal_semaphores[*signal_count] = render_finished;
|
||||
signal_values[*signal_count] = 0;
|
||||
*signal_count += 1;
|
||||
}
|
||||
command_buffers[*command_buffer_count] = cmdbuf;
|
||||
*command_buffer_count += 1;
|
||||
}
|
||||
|
||||
if (graphics_command_buffer_count > 0) {
|
||||
rt_submit_command_buffers_info submit = {
|
||||
.command_buffers = graphics_command_buffers,
|
||||
.command_buffer_count = graphics_command_buffer_count,
|
||||
.signal_semaphores = graphics_signal_semaphores,
|
||||
.signal_values = graphics_signal_values,
|
||||
.signal_semaphore_count = graphics_signal_semaphore_count,
|
||||
.wait_semaphores = graphics_wait_semaphores,
|
||||
.wait_values = graphics_wait_values,
|
||||
.wait_semaphore_count = graphics_wait_semaphore_count,
|
||||
};
|
||||
g_renderer.SubmitCommandBuffers(RT_GRAPHICS_QUEUE, &submit);
|
||||
}
|
||||
|
||||
if (compute_command_buffer_count > 0) {
|
||||
rt_submit_command_buffers_info submit = {
|
||||
.command_buffers = compute_command_buffers,
|
||||
.command_buffer_count = compute_command_buffer_count,
|
||||
.signal_semaphores = compute_signal_semaphores,
|
||||
.signal_values = compute_signal_values,
|
||||
.signal_semaphore_count = compute_signal_semaphore_count,
|
||||
.wait_semaphores = compute_wait_semaphores,
|
||||
.wait_values = compute_wait_values,
|
||||
.wait_semaphore_count = compute_wait_semaphore_count,
|
||||
};
|
||||
g_renderer.SubmitCommandBuffers(RT_COMPUTE_QUEUE, &submit);
|
||||
}
|
||||
|
||||
/* Start next level */
|
||||
|
@ -167,6 +167,9 @@ typedef struct {
|
||||
rt_pass_clear_value depth_stencil_buffer_clear_value;
|
||||
|
||||
rt_rect2i render_area;
|
||||
|
||||
// For debug purposes, can be NULL
|
||||
const char *name;
|
||||
} rt_cmd_begin_pass_info;
|
||||
|
||||
typedef enum {
|
||||
|
@ -283,7 +283,7 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
.deviceIndex = 0,
|
||||
};
|
||||
wait_semaphores[i] = semaphore_info;
|
||||
signal_semaphores[i] = semaphore_info;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
|
@ -29,7 +29,7 @@ void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmdbuf_handle,
|
||||
VkDebugUtilsLabelEXT debug_label = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
|
||||
.color = {0.39f, 0.58f, 0.92f, 1.f},
|
||||
.pLabelName = "RenderPass"
|
||||
.pLabelName = (info->name) ? info->name : "Unnamed pass",
|
||||
};
|
||||
vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
|
||||
#endif
|
||||
|
@ -108,10 +108,10 @@ void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
|
||||
|
||||
vkEndCommandBuffer(cmd);
|
||||
if (rtSubmitSingleCommandBuffer(cmd,
|
||||
&frame->image_available,
|
||||
&frame->render_finished,
|
||||
NULL,
|
||||
1,
|
||||
&frame->render_finished,
|
||||
&frame->swapchain_transitioned,
|
||||
NULL,
|
||||
1,
|
||||
RT_GRAPHICS_QUEUE) != RT_SUCCESS) {
|
||||
@ -124,7 +124,7 @@ void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
|
||||
.pImageIndices = &image_index,
|
||||
.pSwapchains = &g_swapchain.swapchain,
|
||||
.swapchainCount = 1,
|
||||
.pWaitSemaphores = &frame->render_finished,
|
||||
.pWaitSemaphores = &frame->swapchain_transitioned,
|
||||
.waitSemaphoreCount = 1,
|
||||
};
|
||||
|
||||
|
@ -37,6 +37,7 @@ typedef struct {
|
||||
uint32_t swapchain_image_index;
|
||||
VkSemaphore image_available;
|
||||
VkSemaphore render_finished;
|
||||
VkSemaphore swapchain_transitioned;
|
||||
} rt_frame_data;
|
||||
|
||||
typedef struct {
|
||||
|
@ -15,8 +15,8 @@ RT_CVAR_I(rt_VkMaxSemaphores, "Maximum number of semaphores. Default: 1024", 102
|
||||
|
||||
typedef struct rt_gpu_semaphore_s {
|
||||
uint32_t version;
|
||||
VkSemaphore semaphore;
|
||||
uint64_t current_value;
|
||||
VkSemaphore semaphore[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
|
||||
uint64_t current_value[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
|
||||
/* TODO: State tracking. We want to ensure that we don't introduce gpu hangs by waiting on a
|
||||
* not-signaled semaphore. */
|
||||
|
||||
@ -28,8 +28,10 @@ static rt_gpu_semaphore *_first_free;
|
||||
static rt_mutex *_lock;
|
||||
|
||||
static void DestroySemaphore(rt_gpu_semaphore *s) {
|
||||
vkDestroySemaphore(g_gpu.device, s->semaphore, g_gpu.alloc_cb);
|
||||
s->semaphore = VK_NULL_HANDLE;
|
||||
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
||||
vkDestroySemaphore(g_gpu.device, s->semaphore[i], g_gpu.alloc_cb);
|
||||
s->semaphore[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
rtLockMutex(_lock);
|
||||
s->next_free = _first_free;
|
||||
_first_free = s;
|
||||
@ -58,7 +60,8 @@ rt_result InitializeSempahoreManagement(void) {
|
||||
|
||||
void ShutdownSemaphoreManagement(void) {
|
||||
for (int i = 1; i < rt_VkMaxSemaphores.i; ++i) {
|
||||
vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore, g_gpu.alloc_cb);
|
||||
for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j)
|
||||
vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore[j], g_gpu.alloc_cb);
|
||||
}
|
||||
}
|
||||
|
||||
@ -82,23 +85,28 @@ rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count,
|
||||
|
||||
sem->version = (sem->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
||||
|
||||
VkSemaphoreTypeCreateInfo type_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
|
||||
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
|
||||
.initialValue = info[i].initial_value,
|
||||
};
|
||||
VkSemaphoreCreateInfo semaphore_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
.pNext = &type_info,
|
||||
};
|
||||
for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j) {
|
||||
VkSemaphoreTypeCreateInfo type_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
|
||||
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
|
||||
.initialValue = info[i].initial_value,
|
||||
};
|
||||
VkSemaphoreCreateInfo semaphore_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
.pNext = &type_info,
|
||||
};
|
||||
|
||||
if (vkCreateSemaphore(g_gpu.device, &semaphore_info, g_gpu.alloc_cb, &sem->semaphore) !=
|
||||
VK_SUCCESS) {
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
uint32_t index = p_semaphores[j].index;
|
||||
DestroySemaphore(&_semaphores[index]);
|
||||
if (vkCreateSemaphore(g_gpu.device,
|
||||
&semaphore_info,
|
||||
g_gpu.alloc_cb,
|
||||
&sem->semaphore[j]) != VK_SUCCESS) {
|
||||
for (uint32_t k = 0; k < i; ++k) {
|
||||
uint32_t index = p_semaphores[k].index;
|
||||
DestroySemaphore(&_semaphores[index]);
|
||||
}
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
return RT_UNKNOWN_ERROR;
|
||||
sem->current_value[j] = 0;
|
||||
}
|
||||
|
||||
p_semaphores[i].version = sem->version;
|
||||
@ -139,7 +147,8 @@ VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle) {
|
||||
return VK_NULL_HANDLE;
|
||||
if (_semaphores[index].version != handle.version)
|
||||
return VK_NULL_HANDLE;
|
||||
return _semaphores[index].semaphore;
|
||||
uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
||||
return _semaphores[index].semaphore[frame];
|
||||
}
|
||||
|
||||
uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore) {
|
||||
@ -148,10 +157,11 @@ uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore
|
||||
return 0;
|
||||
if (_semaphores[index].version != semaphore.version)
|
||||
return 0;
|
||||
uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
||||
vkGetSemaphoreCounterValue(g_gpu.device,
|
||||
_semaphores[index].semaphore,
|
||||
&_semaphores[index].current_value);
|
||||
return _semaphores[index].current_value;
|
||||
_semaphores[index].semaphore[frame],
|
||||
&_semaphores[index].current_value[frame]);
|
||||
return _semaphores[index].current_value[frame];
|
||||
}
|
||||
|
||||
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) {
|
||||
|
@ -328,8 +328,12 @@ static rt_result ChoosePhysicalDevice(void) {
|
||||
uint32_t highscore = 0;
|
||||
uint32_t best_index = phys_device_count;
|
||||
for (uint32_t i = 0; i < phys_device_count; ++i) {
|
||||
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
|
||||
};
|
||||
VkPhysicalDeviceSynchronization2Features synchronization2_features = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
|
||||
.pNext = &timeline_semaphore_features,
|
||||
};
|
||||
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
|
||||
@ -364,7 +368,8 @@ static rt_result ChoosePhysicalDevice(void) {
|
||||
continue;
|
||||
|
||||
if (!synchronization2_features.synchronization2 ||
|
||||
!dynamic_rendering_features.dynamicRendering)
|
||||
!dynamic_rendering_features.dynamicRendering ||
|
||||
!timeline_semaphore_features.timelineSemaphore)
|
||||
continue;
|
||||
|
||||
/* Check for bindless support */
|
||||
@ -483,8 +488,12 @@ static rt_result CreateDevice(void) {
|
||||
queue_info[distinct_queue_count].pQueuePriorities = &priority;
|
||||
++distinct_queue_count;
|
||||
}
|
||||
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
|
||||
};
|
||||
VkPhysicalDeviceSynchronization2Features synchronization2_features = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
|
||||
.pNext = &timeline_semaphore_features,
|
||||
};
|
||||
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
|
||||
@ -591,6 +600,12 @@ static rt_result CreatePerFrameObjects(void) {
|
||||
&g_gpu.frames[i].image_available) != VK_SUCCESS) {
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
if (vkCreateSemaphore(g_gpu.device,
|
||||
&semaphore_info,
|
||||
g_gpu.alloc_cb,
|
||||
&g_gpu.frames[i].swapchain_transitioned) != VK_SUCCESS) {
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
}
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
@ -599,6 +614,7 @@ void DestroyPerFrameObjects(void) {
|
||||
for (unsigned int i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
||||
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].image_available, g_gpu.alloc_cb);
|
||||
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].render_finished, g_gpu.alloc_cb);
|
||||
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].swapchain_transitioned, g_gpu.alloc_cb);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user