diff --git a/assets/test.framegraph b/assets/test.framegraph index 4f981e8..01194f9 100644 --- a/assets/test.framegraph +++ b/assets/test.framegraph @@ -1,6 +1,6 @@ render_targets { color0 { - format R8G8B8A8_SRGB; + format B8G8R8A8_SRGB; width 1024; height 768; sample_count 4; diff --git a/src/app_framework/main_loop.c b/src/app_framework/main_loop.c index ece8cd7..d40d7f1 100644 --- a/src/app_framework/main_loop.c +++ b/src/app_framework/main_loop.c @@ -20,7 +20,7 @@ void UpdateThreadEntry(void *param) { rtWaitOnSemaphore(&g_main_loop.update_proceed); //rtLog("UT", "Processing %u", g_main_loop.u_frame_id); - (g_main_loop.GameUpdate)(); + (g_main_loop.GameUpdate)(g_main_loop.u_frame_id); //rtLog("UT", "Finished %u", g_main_loop.u_frame_id); g_main_loop.u_frame_id += 1; @@ -40,7 +40,7 @@ void RenderThreadEntry(void *param) { //rtLog("RT", "Processing %u", g_main_loop.r_frame_id); rtBeginGFXFrame(g_main_loop.r_frame_id); - (g_main_loop.GameRender)(); + (g_main_loop.GameRender)(g_main_loop.r_frame_id); rtEndGFXFrame(g_main_loop.r_frame_id); //rtLog("RT", "Finished %u", g_main_loop.r_frame_id); diff --git a/src/app_framework/main_loop.h b/src/app_framework/main_loop.h index 6f369b6..dfeab67 100644 --- a/src/app_framework/main_loop.h +++ b/src/app_framework/main_loop.h @@ -4,8 +4,8 @@ #include "runtime/runtime.h" #include "runtime/threading.h" -typedef void rt_main_loop_update_fn(void); -typedef void rt_main_loop_render_fn(void); +typedef void rt_main_loop_update_fn(unsigned int frame_id); +typedef void rt_main_loop_render_fn(unsigned int frame_id); typedef struct { unsigned int u_frame_id; diff --git a/src/game/entry.c b/src/game/entry.c index 7e09b8a..63ac9c6 100644 --- a/src/game/entry.c +++ b/src/game/entry.c @@ -3,8 +3,8 @@ extern void RegisterCVars(void); extern void Init(void); extern void Shutdown(void); -extern void Update(void); -extern void Render(void); +extern void Update(unsigned int); +extern void Render(unsigned int); static rt_app_callbacks _callbacks = { .RegisterCVars = RegisterCVars, diff --git a/src/game/main.c b/src/game/main.c index 7d883d9..445072c 100644 --- a/src/game/main.c +++ b/src/game/main.c @@ -67,9 +67,10 @@ void Shutdown(void) { rtDestroyFramegraph(_framegraph); } -void Update(void) { +void Update(unsigned int frame_id) { + RT_UNUSED(frame_id); } -void Render(void) { - rtExecuteFramegraph(_framegraph); +void Render(unsigned int frame_id) { + rtExecuteFramegraph(_framegraph, frame_id); } \ No newline at end of file diff --git a/src/gfx/gfx.h b/src/gfx/gfx.h index b8d09b3..54a2de6 100644 --- a/src/gfx/gfx.h +++ b/src/gfx/gfx.h @@ -178,7 +178,7 @@ RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph, rt_render_pass_id pass, const rt_render_pass_bind_fns *bind_fns); -RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph); +RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int frame_id); /* Utility to turn a string into a usable render target id. */ RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len); diff --git a/src/gfx/gfx_framegraph.c b/src/gfx/gfx_framegraph.c index 275e897..ce3d9e5 100644 --- a/src/gfx/gfx_framegraph.c +++ b/src/gfx/gfx_framegraph.c @@ -24,6 +24,7 @@ typedef struct { unsigned int width; unsigned int height; unsigned int sample_count; + rt_gpu_semaphore_handle semaphore; rt_render_target_handle api_render_target; } rt_render_target; @@ -32,6 +33,10 @@ typedef struct { rt_render_pass_type type; const char *name; int execution_level; + + bool reads_swapchain; + bool writes_swapchain; + unsigned int read_count; unsigned int write_count; rt_render_pass_bind_fns bound_fns; @@ -243,6 +248,30 @@ CreateRenderPasses(rt_framegraph *graph, const rt_framegraph_info *info, rt_aren graph->passes[i].type = pass_info[i].type; graph->passes[i].name = NULL; + graph->passes[i].reads_swapchain = false; + graph->passes[i].writes_swapchain = false; + const rt_render_target_info *rts = rtResolveConstRelptr(&info->render_targets); + for (unsigned int j = 0; j < graph->passes[i].read_count; ++j) { + rt_render_target_id rt = graph->passes[i].reads[j].render_target; + for (unsigned int k = 0; k < info->render_target_count; ++k) { + if (rts[k].id == rt && rts[k].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN && + rts[k].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN && + rts[k].format == RT_PIXEL_FORMAT_SWAPCHAIN) { + graph->passes[i].reads_swapchain = true; + } + } + } + for (unsigned int j = 0; j < graph->passes[i].write_count; ++j) { + rt_render_target_id rt = graph->passes[i].writes[j].render_target; + for (unsigned int k = 0; k < info->render_target_count; ++k) { + if (rts[k].id == rt && rts[k].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN && + rts[k].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN && + rts[k].format == RT_PIXEL_FORMAT_SWAPCHAIN) { + graph->passes[i].writes_swapchain = true; + } + } + } + const char *name = rtResolveConstRelptr(&pass_info[i].name); if (name) { size_t name_strlen = strlen(name); @@ -288,6 +317,12 @@ CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_are graph->render_targets[i].height = render_targets[i].height; graph->render_targets[i].sample_count = render_targets[i].sample_count; + rt_gpu_semaphore_info sem_info = { + .initial_value = 0, + .name = NULL, + }; + g_renderer.CreateSemaphores(1, &sem_info, &graph->render_targets[i].semaphore); + if (graph->render_targets[i].width != RT_RENDER_TARGET_SIZE_SWAPCHAIN || graph->render_targets[i].height != RT_RENDER_TARGET_SIZE_SWAPCHAIN || graph->render_targets[i].format != RT_PIXEL_FORMAT_SWAPCHAIN) { @@ -536,7 +571,6 @@ BeginGraphicsPass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffe g_renderer.CmdBeginPass(cmdbuf, &begin_info); } - static void BeginComputePass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffer_handle cmdbuf) { const rt_render_target_write *writes = framegraph->passes[pass_idx].writes; @@ -574,15 +608,64 @@ BeginComputePass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffer } } - -RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) { +RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int frame_id) { int execution_level = framegraph->passes[0].execution_level; uint32_t level_start = 0; + rt_gpu_semaphore_handle swapchain_available = g_renderer.GetSwapchainAvailableSemaphore(); + rt_gpu_semaphore_handle render_finished = g_renderer.GetRenderFinishedSemaphore(); + + rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); + if (!temp.arena) { + rtLog("GFX", "Unable to execute framegraph because no temporary arena is available."); + return; + } + + /* Find the last pass that writes to the swapchain */ + uint32_t last_swapchain_write = 0; + for (uint32_t i = framegraph->pass_count - 1; i > 0; --i) { + if (framegraph->passes[i].writes_swapchain) { + last_swapchain_write = i; + break; + } + } + /* Find the first pass that reads the swapchain 0*/ + uint32_t first_swapchain_read = 0; + for (uint32_t i = 0; framegraph->pass_count; ++i) { + if (framegraph->passes[i].reads_swapchain || framegraph->passes[i].writes_swapchain) { + first_swapchain_read = i; + break; + } + } + + /* Overflows after ~4.871x10^7 years */ + uint64_t signal_value_base = (uint64_t)frame_id * 100; + for (uint32_t i = 0; i <= framegraph->pass_count && level_start < framegraph->pass_count; ++i) { if ((i == framegraph->pass_count) || (framegraph->passes[i].execution_level > execution_level)) { - /* Dispatch all passes in the current execution level */ + + rt_temp_arena level_temp = rtBeginTempArena(temp.arena); + + rt_gpu_semaphore_handle *graphics_wait_semaphores = NULL; + rt_gpu_semaphore_handle *graphics_signal_semaphores = NULL; + uint64_t *graphics_wait_values = NULL; + uint64_t *graphics_signal_values = NULL; + rt_command_buffer_handle *graphics_command_buffers = NULL; + uint32_t graphics_command_buffer_count = 0; + uint32_t graphics_signal_semaphore_count = 0; + uint32_t graphics_wait_semaphore_count = 0; + + rt_gpu_semaphore_handle *compute_wait_semaphores = NULL; + rt_gpu_semaphore_handle *compute_signal_semaphores = NULL; + rt_command_buffer_handle *compute_command_buffers = NULL; + uint64_t *compute_wait_values = NULL; + uint64_t *compute_signal_values = NULL; + uint32_t compute_command_buffer_count = 0; + uint32_t compute_signal_semaphore_count = 0; + uint32_t compute_wait_semaphore_count = 0; + + /* Determine necessary array sizes */ for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) { bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL && framegraph->passes[pass_idx].bound_fns.Execute != NULL && @@ -594,14 +677,74 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) { framegraph->passes[pass_idx].id); continue; } + bool is_graphics_pass = + framegraph->passes[pass_idx].type == RT_RENDER_PASS_TYPE_GRAPHICS; + if (is_graphics_pass) { + graphics_wait_semaphore_count += framegraph->passes[pass_idx].read_count; + graphics_signal_semaphore_count += framegraph->passes[pass_idx].write_count; + if (framegraph->passes[pass_idx].reads_swapchain) + graphics_wait_semaphore_count += 1; + if (framegraph->passes[pass_idx].writes_swapchain) + graphics_signal_semaphore_count += 1; + ++graphics_command_buffer_count; + } else { + compute_wait_semaphore_count += framegraph->passes[pass_idx].read_count; + compute_signal_semaphore_count += framegraph->passes[pass_idx].write_count; + if (framegraph->passes[pass_idx].reads_swapchain) + compute_wait_semaphore_count += 1; + if (framegraph->passes[pass_idx].writes_swapchain) + compute_signal_semaphore_count += 1; + ++compute_command_buffer_count; + } + } - /* - rt_render_pass_id id = framegraph->passes[pass_idx].id; + graphics_wait_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena, + rt_gpu_semaphore_handle, + graphics_wait_semaphore_count); + graphics_wait_values = + RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, graphics_wait_semaphore_count); + graphics_signal_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena, + rt_gpu_semaphore_handle, + graphics_signal_semaphore_count); + graphics_signal_values = + RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, graphics_signal_semaphore_count); + graphics_command_buffers = RT_ARENA_PUSH_ARRAY(level_temp.arena, + rt_command_buffer_handle, + graphics_command_buffer_count); + graphics_signal_semaphore_count = 0; + graphics_wait_semaphore_count = 0; + graphics_command_buffer_count = 0; + + compute_wait_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena, + rt_gpu_semaphore_handle, + compute_wait_semaphore_count); + compute_wait_values = + RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, compute_wait_semaphore_count); + compute_signal_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena, + rt_gpu_semaphore_handle, + compute_signal_semaphore_count); + compute_signal_values = + RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, compute_signal_semaphore_count); + compute_command_buffers = RT_ARENA_PUSH_ARRAY(level_temp.arena, + rt_command_buffer_handle, + compute_command_buffer_count); + compute_signal_semaphore_count = 0; + compute_wait_semaphore_count = 0; + compute_command_buffer_count = 0; + + /* Dispatch all passes in the current execution level */ + for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) { + bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL && + framegraph->passes[pass_idx].bound_fns.Execute != NULL && + framegraph->passes[pass_idx].bound_fns.Finalize != NULL; + if (!pass_bound) + continue; + + // rt_render_pass_id id = framegraph->passes[pass_idx].id; const rt_render_target_write *writes = framegraph->passes[pass_idx].writes; const rt_render_target_read *reads = framegraph->passes[pass_idx].reads; uint32_t write_count = framegraph->passes[pass_idx].write_count; uint32_t read_count = framegraph->passes[pass_idx].read_count; - */ /* TODO(Kevin): Every one of these should be a job-dispatch*/ @@ -646,9 +789,83 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) { g_renderer.CmdEndPass(cmdbuf); } - rt_submit_command_buffers_info submit = {.command_buffer_count = 1, - .command_buffers = &cmdbuf}; - g_renderer.SubmitCommandBuffers(is_graphics_pass ? RT_GRAPHICS_QUEUE : RT_COMPUTE_QUEUE, &submit); + rt_gpu_semaphore_handle *wait_semaphores = NULL, *signal_semaphores = NULL; + uint64_t *wait_values = NULL, *signal_values = NULL; + rt_command_buffer_handle *command_buffers = NULL; + uint32_t *wait_count = NULL, *signal_count = 0; + uint32_t *command_buffer_count = NULL; + if (is_graphics_pass) { + wait_semaphores = graphics_wait_semaphores; + signal_semaphores = graphics_signal_semaphores; + wait_values = graphics_wait_values; + signal_values = graphics_signal_values; + command_buffers = graphics_command_buffers; + wait_count = &graphics_wait_semaphore_count; + signal_count = &graphics_signal_semaphore_count; + command_buffer_count = &graphics_command_buffer_count; + } else { + wait_semaphores = compute_wait_semaphores; + signal_semaphores = compute_signal_semaphores; + wait_values = compute_wait_values; + signal_values = compute_signal_values; + command_buffers = compute_command_buffers; + wait_count = &compute_wait_semaphore_count; + signal_count = &compute_signal_semaphore_count; + command_buffer_count = &compute_command_buffer_count; + } + + for (uint32_t j = 0; j < read_count; ++j) { + rt_render_target *rt = GetRenderTarget(framegraph, reads[j].render_target); + wait_semaphores[*wait_count] = rt->semaphore; + wait_values[*wait_count] = signal_value_base + execution_level; + *wait_count += 1; + } + for (uint32_t j = 0; j < write_count; ++j) { + rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target); + signal_semaphores[*signal_count] = rt->semaphore; + signal_values[*signal_count] = signal_value_base + execution_level + 1; + *signal_count += 1; + } + if (pass_idx == first_swapchain_read) { + wait_semaphores[*wait_count] = swapchain_available; + wait_values[*wait_count] = 0; + *wait_count += 1; + } + if (pass_idx == last_swapchain_write) { + signal_semaphores[*signal_count] = render_finished; + signal_values[*signal_count] = 0; + *signal_count += 1; + } + command_buffers[*command_buffer_count] = cmdbuf; + *command_buffer_count += 1; + } + + if (graphics_command_buffer_count > 0) { + rt_submit_command_buffers_info submit = { + .command_buffers = graphics_command_buffers, + .command_buffer_count = graphics_command_buffer_count, + .signal_semaphores = graphics_signal_semaphores, + .signal_values = graphics_signal_values, + .signal_semaphore_count = graphics_signal_semaphore_count, + .wait_semaphores = graphics_wait_semaphores, + .wait_values = graphics_wait_values, + .wait_semaphore_count = graphics_wait_semaphore_count, + }; + g_renderer.SubmitCommandBuffers(RT_GRAPHICS_QUEUE, &submit); + } + + if (compute_command_buffer_count > 0) { + rt_submit_command_buffers_info submit = { + .command_buffers = compute_command_buffers, + .command_buffer_count = compute_command_buffer_count, + .signal_semaphores = compute_signal_semaphores, + .signal_values = compute_signal_values, + .signal_semaphore_count = compute_signal_semaphore_count, + .wait_semaphores = compute_wait_semaphores, + .wait_values = compute_wait_values, + .wait_semaphore_count = compute_wait_semaphore_count, + }; + g_renderer.SubmitCommandBuffers(RT_COMPUTE_QUEUE, &submit); } /* Start next level */ diff --git a/src/gfx/renderer_api.h b/src/gfx/renderer_api.h index eadb4bc..3754b40 100644 --- a/src/gfx/renderer_api.h +++ b/src/gfx/renderer_api.h @@ -167,6 +167,9 @@ typedef struct { rt_pass_clear_value depth_stencil_buffer_clear_value; rt_rect2i render_area; + + // For debug purposes, can be NULL + const char *name; } rt_cmd_begin_pass_info; typedef enum { diff --git a/src/renderer/vk/command_buffers.c b/src/renderer/vk/command_buffers.c index 3189d9c..4dfeec1 100644 --- a/src/renderer/vk/command_buffers.c +++ b/src/renderer/vk/command_buffers.c @@ -283,7 +283,7 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue, .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, .deviceIndex = 0, }; - wait_semaphores[i] = semaphore_info; + signal_semaphores[i] = semaphore_info; } for (uint32_t i = 0; i < count; ++i) { diff --git a/src/renderer/vk/commands.c b/src/renderer/vk/commands.c index 09bdf64..4d0592f 100644 --- a/src/renderer/vk/commands.c +++ b/src/renderer/vk/commands.c @@ -29,7 +29,7 @@ void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmdbuf_handle, VkDebugUtilsLabelEXT debug_label = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, .color = {0.39f, 0.58f, 0.92f, 1.f}, - .pLabelName = "RenderPass" + .pLabelName = (info->name) ? info->name : "Unnamed pass", }; vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label); #endif diff --git a/src/renderer/vk/frame.c b/src/renderer/vk/frame.c index adacaa9..9beec6b 100644 --- a/src/renderer/vk/frame.c +++ b/src/renderer/vk/frame.c @@ -108,10 +108,10 @@ void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) { vkEndCommandBuffer(cmd); if (rtSubmitSingleCommandBuffer(cmd, - &frame->image_available, + &frame->render_finished, NULL, 1, - &frame->render_finished, + &frame->swapchain_transitioned, NULL, 1, RT_GRAPHICS_QUEUE) != RT_SUCCESS) { @@ -124,7 +124,7 @@ void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) { .pImageIndices = &image_index, .pSwapchains = &g_swapchain.swapchain, .swapchainCount = 1, - .pWaitSemaphores = &frame->render_finished, + .pWaitSemaphores = &frame->swapchain_transitioned, .waitSemaphoreCount = 1, }; diff --git a/src/renderer/vk/gpu.h b/src/renderer/vk/gpu.h index 1d9caca..65ab239 100644 --- a/src/renderer/vk/gpu.h +++ b/src/renderer/vk/gpu.h @@ -37,6 +37,7 @@ typedef struct { uint32_t swapchain_image_index; VkSemaphore image_available; VkSemaphore render_finished; + VkSemaphore swapchain_transitioned; } rt_frame_data; typedef struct { diff --git a/src/renderer/vk/gpu_sync.c b/src/renderer/vk/gpu_sync.c index 63c309d..c99360d 100644 --- a/src/renderer/vk/gpu_sync.c +++ b/src/renderer/vk/gpu_sync.c @@ -15,8 +15,8 @@ RT_CVAR_I(rt_VkMaxSemaphores, "Maximum number of semaphores. Default: 1024", 102 typedef struct rt_gpu_semaphore_s { uint32_t version; - VkSemaphore semaphore; - uint64_t current_value; + VkSemaphore semaphore[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT]; + uint64_t current_value[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT]; /* TODO: State tracking. We want to ensure that we don't introduce gpu hangs by waiting on a * not-signaled semaphore. */ @@ -28,8 +28,10 @@ static rt_gpu_semaphore *_first_free; static rt_mutex *_lock; static void DestroySemaphore(rt_gpu_semaphore *s) { - vkDestroySemaphore(g_gpu.device, s->semaphore, g_gpu.alloc_cb); - s->semaphore = VK_NULL_HANDLE; + for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) { + vkDestroySemaphore(g_gpu.device, s->semaphore[i], g_gpu.alloc_cb); + s->semaphore[i] = VK_NULL_HANDLE; + } rtLockMutex(_lock); s->next_free = _first_free; _first_free = s; @@ -58,7 +60,8 @@ rt_result InitializeSempahoreManagement(void) { void ShutdownSemaphoreManagement(void) { for (int i = 1; i < rt_VkMaxSemaphores.i; ++i) { - vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore, g_gpu.alloc_cb); + for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j) + vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore[j], g_gpu.alloc_cb); } } @@ -82,23 +85,28 @@ rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count, sem->version = (sem->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION; - VkSemaphoreTypeCreateInfo type_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, - .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, - .initialValue = info[i].initial_value, - }; - VkSemaphoreCreateInfo semaphore_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &type_info, - }; + for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j) { + VkSemaphoreTypeCreateInfo type_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = info[i].initial_value, + }; + VkSemaphoreCreateInfo semaphore_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &type_info, + }; - if (vkCreateSemaphore(g_gpu.device, &semaphore_info, g_gpu.alloc_cb, &sem->semaphore) != - VK_SUCCESS) { - for (uint32_t j = 0; j < i; ++j) { - uint32_t index = p_semaphores[j].index; - DestroySemaphore(&_semaphores[index]); + if (vkCreateSemaphore(g_gpu.device, + &semaphore_info, + g_gpu.alloc_cb, + &sem->semaphore[j]) != VK_SUCCESS) { + for (uint32_t k = 0; k < i; ++k) { + uint32_t index = p_semaphores[k].index; + DestroySemaphore(&_semaphores[index]); + } + return RT_UNKNOWN_ERROR; } - return RT_UNKNOWN_ERROR; + sem->current_value[j] = 0; } p_semaphores[i].version = sem->version; @@ -139,7 +147,8 @@ VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle) { return VK_NULL_HANDLE; if (_semaphores[index].version != handle.version) return VK_NULL_HANDLE; - return _semaphores[index].semaphore; + uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight; + return _semaphores[index].semaphore[frame]; } uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore) { @@ -148,10 +157,11 @@ uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore return 0; if (_semaphores[index].version != semaphore.version) return 0; + uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight; vkGetSemaphoreCounterValue(g_gpu.device, - _semaphores[index].semaphore, - &_semaphores[index].current_value); - return _semaphores[index].current_value; + _semaphores[index].semaphore[frame], + &_semaphores[index].current_value[frame]); + return _semaphores[index].current_value[frame]; } rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) { diff --git a/src/renderer/vk/init.c b/src/renderer/vk/init.c index b427819..33b9c40 100644 --- a/src/renderer/vk/init.c +++ b/src/renderer/vk/init.c @@ -328,8 +328,12 @@ static rt_result ChoosePhysicalDevice(void) { uint32_t highscore = 0; uint32_t best_index = phys_device_count; for (uint32_t i = 0; i < phys_device_count; ++i) { + VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, + }; VkPhysicalDeviceSynchronization2Features synchronization2_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES, + .pNext = &timeline_semaphore_features, }; VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, @@ -364,7 +368,8 @@ static rt_result ChoosePhysicalDevice(void) { continue; if (!synchronization2_features.synchronization2 || - !dynamic_rendering_features.dynamicRendering) + !dynamic_rendering_features.dynamicRendering || + !timeline_semaphore_features.timelineSemaphore) continue; /* Check for bindless support */ @@ -483,8 +488,12 @@ static rt_result CreateDevice(void) { queue_info[distinct_queue_count].pQueuePriorities = &priority; ++distinct_queue_count; } + VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, + }; VkPhysicalDeviceSynchronization2Features synchronization2_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES, + .pNext = &timeline_semaphore_features, }; VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, @@ -591,6 +600,12 @@ static rt_result CreatePerFrameObjects(void) { &g_gpu.frames[i].image_available) != VK_SUCCESS) { return RT_UNKNOWN_ERROR; } + if (vkCreateSemaphore(g_gpu.device, + &semaphore_info, + g_gpu.alloc_cb, + &g_gpu.frames[i].swapchain_transitioned) != VK_SUCCESS) { + return RT_UNKNOWN_ERROR; + } } return RT_SUCCESS; } @@ -599,6 +614,7 @@ void DestroyPerFrameObjects(void) { for (unsigned int i = 0; i < g_gpu.max_frames_in_flight; ++i) { vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].image_available, g_gpu.alloc_cb); vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].render_finished, g_gpu.alloc_cb); + vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].swapchain_transitioned, g_gpu.alloc_cb); } }