From e989c2b406593e77c8a153c96b3480e11e29e466 Mon Sep 17 00:00:00 2001 From: Kevin Trogant Date: Fri, 16 Feb 2024 15:31:23 +0100 Subject: [PATCH] Creating timed semaphores - Similar to Dx12 fences. - More flexible than VK 1 binary semaphores. --- src/renderer/vk/command_buffers.c | 71 +++++++++++---- src/renderer/vk/command_buffers.h | 1 + src/renderer/vk/gpu_sync.c | 139 ++++++++++++++++++++++++++++++ src/renderer/vk/gpu_sync.h | 10 +++ src/renderer/vk/init.c | 6 ++ src/renderer/vk/meson.build | 2 + src/runtime/gfx_main.c | 13 ++- src/runtime/renderer_api.h | 19 +++- 8 files changed, 240 insertions(+), 21 deletions(-) create mode 100644 src/renderer/vk/gpu_sync.c create mode 100644 src/renderer/vk/gpu_sync.h diff --git a/src/renderer/vk/command_buffers.c b/src/renderer/vk/command_buffers.c index fb77acb..bc628b2 100644 --- a/src/renderer/vk/command_buffers.c +++ b/src/renderer/vk/command_buffers.c @@ -1,4 +1,5 @@ #include "gpu.h" +#include "gpu_sync.h" #include "swapchain.h" #include "runtime/atomics.h" @@ -75,7 +76,7 @@ void rtResetCommandPools(unsigned int frame_id) { unsigned int pool_idx = frame_id % g_gpu.max_frames_in_flight; for (uint32_t i = 1; i < _next_pools; ++i) { if (vkResetCommandPool(g_gpu.device, - _pools[i].graphics_pools[pool_idx], + _pools[i].graphics_pools[pool_idx], VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) { rtLog("vk", "Failed to reset graphics pool slot %u index %u", i, pool_idx); } @@ -233,12 +234,48 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue, rt_result result = RT_SUCCESS; VkQueue target_queue = rtGetQueue(queue); - VkCommandBuffer *command_buffers = RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBuffer, count); + VkCommandBufferSubmitInfo *command_buffers = + RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBufferSubmitInfo, count); if (!command_buffers) { result = RT_OUT_OF_MEMORY; goto out; } + VkSemaphoreSubmitInfo *wait_semaphores = + RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->wait_semaphore_count); + if (!wait_semaphores) { + result = RT_OUT_OF_MEMORY; + goto out; + } + VkSemaphoreSubmitInfo *signal_semaphores = + RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->signal_semaphore_count); + if (!signal_semaphores) { + result = RT_OUT_OF_MEMORY; + goto out; + } + uint32_t wait_count = info->wait_semaphore_count; + uint32_t signal_count = info->signal_semaphore_count; + for (uint32_t i = 0; i < wait_count; ++i) { + VkSemaphoreSubmitInfo semaphore_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = rtGetSemaphore(info->wait_semaphores[i]), + .value = info->wait_values[i], + .stageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + .deviceIndex = 0, + }; + wait_semaphores[i] = semaphore_info; + } + for (uint32_t i = 0; i < signal_count; ++i) { + VkSemaphoreSubmitInfo semaphore_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = rtGetSemaphore(info->signal_semaphores[i]), + .value = info->signal_values[i], + .stageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + .deviceIndex = 0, + }; + wait_semaphores[i] = semaphore_info; + } + for (uint32_t i = 0; i < count; ++i) { uint32_t slot = info->command_buffers[i].index; if (_command_buffers[slot].version != info->command_buffers[i].version) { @@ -253,27 +290,23 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue, result = RT_INVALID_VALUE; goto out; } - command_buffers[i] = _command_buffers[slot].command_buffer; + command_buffers[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO; + command_buffers[i].pNext = NULL; + command_buffers[i].deviceMask = 0; + command_buffers[i].commandBuffer = _command_buffers[slot].command_buffer; } - /* TODO(Kevin): Retrieve semaphores */ - VkSemaphore *wait_semaphores = NULL; - VkSemaphore *signal_semaphores = NULL; - uint32_t wait_count = 0; - uint32_t signal_count = 0; - - VkSubmitInfo submit_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pCommandBuffers = command_buffers, - .commandBufferCount = count, - .pWaitSemaphores = wait_semaphores, - .pWaitDstStageMask = NULL, - .waitSemaphoreCount = wait_count, - .pSignalSemaphores = signal_semaphores, - .signalSemaphoreCount = signal_count, + VkSubmitInfo2 submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .waitSemaphoreInfoCount = wait_count, + .signalSemaphoreInfoCount = signal_count, + .pWaitSemaphoreInfos = wait_semaphores, + .pSignalSemaphoreInfos = signal_semaphores, + .commandBufferInfoCount = count, + .pCommandBufferInfos = command_buffers, }; - if (vkQueueSubmit(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) { + if (vkQueueSubmit2(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) { rtLog("vk", "vkQueueSubmit failed."); result = RT_UNKNOWN_ERROR; } diff --git a/src/renderer/vk/command_buffers.h b/src/renderer/vk/command_buffers.h index d8803ae..06646b7 100644 --- a/src/renderer/vk/command_buffers.h +++ b/src/renderer/vk/command_buffers.h @@ -5,4 +5,5 @@ void rtResetCommandPools(unsigned int frame_id); + #endif diff --git a/src/renderer/vk/gpu_sync.c b/src/renderer/vk/gpu_sync.c new file mode 100644 index 0000000..f606fc4 --- /dev/null +++ b/src/renderer/vk/gpu_sync.c @@ -0,0 +1,139 @@ +#include "gpu.h" + +#include "runtime/renderer_api.h" +#include "runtime/config.h" +#include "runtime/threading.h" +#include "runtime/handles.h" + +#include + +RT_CVAR_I(rt_VkMaxSemaphores, "Maximum number of semaphores. Default: 1024", 1024); + +typedef struct rt_gpu_semaphore_s { + uint32_t version; + VkSemaphore semaphore; + uint64_t current_value; + /* TODO: State tracking. We want to ensure that we don't introduce gpu hangs by waiting on a not-signaled semaphore. */ + + struct rt_gpu_semaphore_s *next_free; +} rt_gpu_semaphore; + +static rt_gpu_semaphore *_semaphores; +static rt_gpu_semaphore *_first_free; +static rt_mutex *_lock; + +static void DestroySemaphore(rt_gpu_semaphore *s) { + vkDestroySemaphore(g_gpu.device, s->semaphore, g_gpu.alloc_cb); + s->semaphore = VK_NULL_HANDLE; + rtLockMutex(_lock); + s->next_free = _first_free; + _first_free = s; + rtUnlockMutex(_lock); +} + +rt_result InitializeSempahoreManagement(void) { + _semaphores = calloc(rt_VkMaxSemaphores.i, sizeof(rt_gpu_semaphore)); + if (!_semaphores) + return RT_OUT_OF_MEMORY; + + _lock = rtCreateMutex(); + if (!_lock) { + free(_semaphores); + return RT_UNKNOWN_ERROR; + } + + /* Keep 0 unused for the invalid handle */ + _first_free = &_semaphores[1]; + for (int i = 1; i < rt_VkMaxSemaphores.i - 1; ++i) + _semaphores[i].next_free = &_semaphores[i + 1]; + _semaphores[rt_VkMaxSemaphores.i - 1].next_free = NULL; + + return RT_SUCCESS; +} + +void ShutdownSemaphoreManagement(void) { + for (int i = 1; i < rt_VkMaxSemaphores.i; ++i) { + vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore, g_gpu.alloc_cb); + } +} + +rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count, const rt_gpu_semaphore_info *info, rt_gpu_semaphore_handle *p_semaphores) { + for (uint32_t i = 0; i < count; ++i) { + rtLockMutex(_lock); + rt_gpu_semaphore *sem = _first_free; + if (sem) + _first_free = sem->next_free; + rtUnlockMutex(_lock); + + if (!sem) { + for (uint32_t j = 0; j < i; ++j) { + uint32_t index = p_semaphores[j].index; + DestroySemaphore(&_semaphores[index]); + } + return RT_OUT_OF_MEMORY; + } + + sem->version = (sem->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION; + + VkSemaphoreTypeCreateInfo type_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = info[i].initial_value, + }; + VkSemaphoreCreateInfo semaphore_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &type_info, + }; + + if (vkCreateSemaphore(g_gpu.device, &semaphore_info, g_gpu.alloc_cb, &sem->semaphore) != + VK_SUCCESS) { + for (uint32_t j = 0; j < i; ++j) { + uint32_t index = p_semaphores[j].index; + DestroySemaphore(&_semaphores[index]); + } + return RT_UNKNOWN_ERROR; + } + + p_semaphores[i].version = sem->version; + p_semaphores[i].index = (uint32_t)(sem - _semaphores); + } + return RT_SUCCESS; +} + +void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) { + for (uint32_t i = 0; i < count; ++i) { + uint32_t index = semaphores[i].index; + if (index >= (uint32_t)rt_VkMaxSemaphores.i) + continue; + if (semaphores[i].version != _semaphores[index].version) { + rtLog("vk", + "Tried to destroy semaphore %u with version %u, but the semaphore has version %u", + index, + semaphores[i].version, + _semaphores[index].version); + continue; + } + DestroySemaphore(&_semaphores[index]); + } +} + +VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle) { + uint32_t index = handle.index; + if (!RT_IS_HANDLE_VALID(handle) || index >= (uint32_t)rt_VkMaxSemaphores.i) + return VK_NULL_HANDLE; + if (_semaphores[index].version != handle.version) + return VK_NULL_HANDLE; + return _semaphores[index].semaphore; +} + +uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore) { + uint32_t index = semaphore.index; + if (!RT_IS_HANDLE_VALID(semaphore) || index >= (uint32_t)rt_VkMaxSemaphores.i) + return 0; + if (_semaphores[index].version != semaphore.version) + return 0; + vkGetSemaphoreCounterValue(g_gpu.device, + _semaphores[index].semaphore, + &_semaphores[index].current_value); + return _semaphores[index].current_value; +} \ No newline at end of file diff --git a/src/renderer/vk/gpu_sync.h b/src/renderer/vk/gpu_sync.h new file mode 100644 index 0000000..62de658 --- /dev/null +++ b/src/renderer/vk/gpu_sync.h @@ -0,0 +1,10 @@ +#ifndef RT_VK_GPU_SYNC_H +#define RT_VK_GPU_SYNC_H + +#include + +#include "runtime/renderer_api.h" + +VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle); + +#endif diff --git a/src/renderer/vk/init.c b/src/renderer/vk/init.c index 34d6d6c..c8afeae 100644 --- a/src/renderer/vk/init.c +++ b/src/renderer/vk/init.c @@ -549,6 +549,8 @@ extern rt_result InitRenderTargetManagement(void); extern void ShutdownRenderTargetManagement(void); extern rt_result InitCommandBufferManagement(void); extern void ShutdownCommandBufferManagement(void); +extern rt_result InitializeSempahoreManagement(void); +extern void ShutdownSemaphoreManagement(void); rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { rtLog("vk", "Init"); @@ -586,6 +588,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { if (res != RT_SUCCESS) return res; res = InitRenderTargetManagement(); + if (res != RT_SUCCESS) + return res; + res = InitializeSempahoreManagement(); if (res != RT_SUCCESS) return res; res = InitCommandBufferManagement(); @@ -603,6 +608,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) { vkDeviceWaitIdle(g_gpu.device); rtDestroySwapchain(); ShutdownCommandBufferManagement(); + ShutdownSemaphoreManagement(); ShutdownRenderTargetManagement(); ShutdownPipelineManagement(); DestroyAllocator(); diff --git a/src/renderer/vk/meson.build b/src/renderer/vk/meson.build index dd5191a..163951c 100644 --- a/src/renderer/vk/meson.build +++ b/src/renderer/vk/meson.build @@ -11,12 +11,14 @@ if vk_dep.found() # Project Sources 'command_buffers.h', 'gpu.h', + 'gpu_sync.h', 'pipelines.h', 'render_targets.h', 'swapchain.h', 'command_buffers.c', 'frame.c', + 'gpu_sync.c', 'helper.c', 'init.c', 'pipelines.c', diff --git a/src/runtime/gfx_main.c b/src/runtime/gfx_main.c index c288472..1b31b0f 100644 --- a/src/runtime/gfx_main.c +++ b/src/runtime/gfx_main.c @@ -36,9 +36,14 @@ extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t, rt_command_buffer_handle *); extern rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue, const rt_submit_command_buffers_info *); +extern rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t, + const rt_gpu_semaphore_info *, + rt_gpu_semaphore_handle *); +extern void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *); +extern uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle); #endif -extern rt_result InitFramegraphManager(void); + extern rt_result InitFramegraphManager(void); extern void ShutdownFramegraphManager(void); static bool LoadRenderer(void) { @@ -69,6 +74,9 @@ static bool LoadRenderer(void) { RETRIEVE_SYMBOL(DestroyRenderTarget, rt_destroy_render_target_fn); RETRIEVE_SYMBOL(AllocCommandBuffers, rt_alloc_command_buffers_fn); RETRIEVE_SYMBOL(SubmitCommandBuffers, rt_submit_command_buffers_fn); + RETRIEVE_SYMBOL(CreateSemaphores, rt_create_gpu_semaphores_fn); + RETRIEVE_SYMBOL(DestroySemaphores, rt_destroy_gpu_semaphores_fn); + RETRIEVE_SYMBOL(GetSemaphoreValue, rt_get_gpu_semaphore_value_fn); } else { rtReportError("GFX", "Unsupported renderer backend: (%s) %s", @@ -88,6 +96,9 @@ static bool LoadRenderer(void) { g_renderer.DestroyRenderTarget = &rtRenDestroyRenderTarget; g_renderer.AllocCommandBuffers = &rtRenAllocCommandBuffers; g_renderer.SubmitCommandBuffers = &rtRenSubmitCommandBuffers; + g_renderer.CreateSemaphores = &rtRenCreateSemaphores; + g_renderer.DestroySemaphores = &rtRenDestroySemaphores; + g_renderer.GetSemaphoreValue = &rtRenGetSemaphoreValue; #endif return true; } diff --git a/src/runtime/renderer_api.h b/src/runtime/renderer_api.h index e8655fa..6a0da71 100644 --- a/src/runtime/renderer_api.h +++ b/src/runtime/renderer_api.h @@ -120,12 +120,20 @@ typedef struct { typedef struct { const rt_command_buffer_handle *command_buffers; const rt_gpu_semaphore_handle *wait_semaphores; + const uint64_t *wait_values; const rt_gpu_semaphore_handle *signal_semaphores; + const uint64_t *signal_values; uint32_t command_buffer_count; uint32_t wait_semaphore_count; uint32_t signal_semaphore_count; } rt_submit_command_buffers_info; +typedef struct { + /* Optional, for debug purposes */ + const char *name; + uint64_t initial_value; +} rt_gpu_semaphore_info; + /* Renderer API */ typedef void rt_register_renderer_cvars_fn(void); @@ -139,7 +147,13 @@ typedef void rt_destroy_render_target_fn(rt_render_target_handle handle); typedef rt_result rt_alloc_command_buffers_fn(uint32_t count, const rt_alloc_command_buffer_info *info, rt_command_buffer_handle *p_command_buffers); -typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue, const rt_submit_command_buffers_info *info); +typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue, + const rt_submit_command_buffers_info *info); +typedef rt_result rt_create_gpu_semaphores_fn(uint32_t count, + const rt_gpu_semaphore_info *info, + rt_gpu_semaphore_handle *p_semaphores); +typedef void rt_destroy_gpu_semaphores_fn(uint32_t count, rt_gpu_semaphore_handle *semaphores); +typedef uint64_t rt_get_gpu_semaphore_value_fn(rt_gpu_semaphore_handle semaphore); typedef struct { rt_register_renderer_cvars_fn *RegisterCVars; @@ -152,6 +166,9 @@ typedef struct { rt_destroy_render_target_fn *DestroyRenderTarget; rt_alloc_command_buffers_fn *AllocCommandBuffers; rt_submit_command_buffers_fn *SubmitCommandBuffers; + rt_create_gpu_semaphores_fn *CreateSemaphores; + rt_destroy_gpu_semaphores_fn *DestroySemaphores; + rt_get_gpu_semaphore_value_fn *GetSemaphoreValue; } rt_renderer_api; #define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name