Creating timed semaphores

- Similar to Dx12 fences.
- More flexible than VK 1 binary semaphores.
This commit is contained in:
Kevin Trogant 2024-02-16 15:31:23 +01:00
parent 9b162379d4
commit e989c2b406
8 changed files with 240 additions and 21 deletions

View File

@ -1,4 +1,5 @@
#include "gpu.h"
#include "gpu_sync.h"
#include "swapchain.h"
#include "runtime/atomics.h"
@ -233,12 +234,48 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
rt_result result = RT_SUCCESS;
VkQueue target_queue = rtGetQueue(queue);
VkCommandBuffer *command_buffers = RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBuffer, count);
VkCommandBufferSubmitInfo *command_buffers =
RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBufferSubmitInfo, count);
if (!command_buffers) {
result = RT_OUT_OF_MEMORY;
goto out;
}
VkSemaphoreSubmitInfo *wait_semaphores =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->wait_semaphore_count);
if (!wait_semaphores) {
result = RT_OUT_OF_MEMORY;
goto out;
}
VkSemaphoreSubmitInfo *signal_semaphores =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->signal_semaphore_count);
if (!signal_semaphores) {
result = RT_OUT_OF_MEMORY;
goto out;
}
uint32_t wait_count = info->wait_semaphore_count;
uint32_t signal_count = info->signal_semaphore_count;
for (uint32_t i = 0; i < wait_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = rtGetSemaphore(info->wait_semaphores[i]),
.value = info->wait_values[i],
.stageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
wait_semaphores[i] = semaphore_info;
}
for (uint32_t i = 0; i < signal_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = rtGetSemaphore(info->signal_semaphores[i]),
.value = info->signal_values[i],
.stageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
wait_semaphores[i] = semaphore_info;
}
for (uint32_t i = 0; i < count; ++i) {
uint32_t slot = info->command_buffers[i].index;
if (_command_buffers[slot].version != info->command_buffers[i].version) {
@ -253,27 +290,23 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
result = RT_INVALID_VALUE;
goto out;
}
command_buffers[i] = _command_buffers[slot].command_buffer;
command_buffers[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO;
command_buffers[i].pNext = NULL;
command_buffers[i].deviceMask = 0;
command_buffers[i].commandBuffer = _command_buffers[slot].command_buffer;
}
/* TODO(Kevin): Retrieve semaphores */
VkSemaphore *wait_semaphores = NULL;
VkSemaphore *signal_semaphores = NULL;
uint32_t wait_count = 0;
uint32_t signal_count = 0;
VkSubmitInfo submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pCommandBuffers = command_buffers,
.commandBufferCount = count,
.pWaitSemaphores = wait_semaphores,
.pWaitDstStageMask = NULL,
.waitSemaphoreCount = wait_count,
.pSignalSemaphores = signal_semaphores,
.signalSemaphoreCount = signal_count,
VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = wait_count,
.signalSemaphoreInfoCount = signal_count,
.pWaitSemaphoreInfos = wait_semaphores,
.pSignalSemaphoreInfos = signal_semaphores,
.commandBufferInfoCount = count,
.pCommandBufferInfos = command_buffers,
};
if (vkQueueSubmit(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
if (vkQueueSubmit2(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
rtLog("vk", "vkQueueSubmit failed.");
result = RT_UNKNOWN_ERROR;
}

View File

@ -5,4 +5,5 @@
void rtResetCommandPools(unsigned int frame_id);
#endif

139
src/renderer/vk/gpu_sync.c Normal file
View File

@ -0,0 +1,139 @@
#include "gpu.h"
#include "runtime/renderer_api.h"
#include "runtime/config.h"
#include "runtime/threading.h"
#include "runtime/handles.h"
#include <stdlib.h>
RT_CVAR_I(rt_VkMaxSemaphores, "Maximum number of semaphores. Default: 1024", 1024);
typedef struct rt_gpu_semaphore_s {
uint32_t version;
VkSemaphore semaphore;
uint64_t current_value;
/* TODO: State tracking. We want to ensure that we don't introduce gpu hangs by waiting on a not-signaled semaphore. */
struct rt_gpu_semaphore_s *next_free;
} rt_gpu_semaphore;
static rt_gpu_semaphore *_semaphores;
static rt_gpu_semaphore *_first_free;
static rt_mutex *_lock;
static void DestroySemaphore(rt_gpu_semaphore *s) {
vkDestroySemaphore(g_gpu.device, s->semaphore, g_gpu.alloc_cb);
s->semaphore = VK_NULL_HANDLE;
rtLockMutex(_lock);
s->next_free = _first_free;
_first_free = s;
rtUnlockMutex(_lock);
}
rt_result InitializeSempahoreManagement(void) {
_semaphores = calloc(rt_VkMaxSemaphores.i, sizeof(rt_gpu_semaphore));
if (!_semaphores)
return RT_OUT_OF_MEMORY;
_lock = rtCreateMutex();
if (!_lock) {
free(_semaphores);
return RT_UNKNOWN_ERROR;
}
/* Keep 0 unused for the invalid handle */
_first_free = &_semaphores[1];
for (int i = 1; i < rt_VkMaxSemaphores.i - 1; ++i)
_semaphores[i].next_free = &_semaphores[i + 1];
_semaphores[rt_VkMaxSemaphores.i - 1].next_free = NULL;
return RT_SUCCESS;
}
void ShutdownSemaphoreManagement(void) {
for (int i = 1; i < rt_VkMaxSemaphores.i; ++i) {
vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore, g_gpu.alloc_cb);
}
}
rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count, const rt_gpu_semaphore_info *info, rt_gpu_semaphore_handle *p_semaphores) {
for (uint32_t i = 0; i < count; ++i) {
rtLockMutex(_lock);
rt_gpu_semaphore *sem = _first_free;
if (sem)
_first_free = sem->next_free;
rtUnlockMutex(_lock);
if (!sem) {
for (uint32_t j = 0; j < i; ++j) {
uint32_t index = p_semaphores[j].index;
DestroySemaphore(&_semaphores[index]);
}
return RT_OUT_OF_MEMORY;
}
sem->version = (sem->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
VkSemaphoreTypeCreateInfo type_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
.initialValue = info[i].initial_value,
};
VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &type_info,
};
if (vkCreateSemaphore(g_gpu.device, &semaphore_info, g_gpu.alloc_cb, &sem->semaphore) !=
VK_SUCCESS) {
for (uint32_t j = 0; j < i; ++j) {
uint32_t index = p_semaphores[j].index;
DestroySemaphore(&_semaphores[index]);
}
return RT_UNKNOWN_ERROR;
}
p_semaphores[i].version = sem->version;
p_semaphores[i].index = (uint32_t)(sem - _semaphores);
}
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) {
for (uint32_t i = 0; i < count; ++i) {
uint32_t index = semaphores[i].index;
if (index >= (uint32_t)rt_VkMaxSemaphores.i)
continue;
if (semaphores[i].version != _semaphores[index].version) {
rtLog("vk",
"Tried to destroy semaphore %u with version %u, but the semaphore has version %u",
index,
semaphores[i].version,
_semaphores[index].version);
continue;
}
DestroySemaphore(&_semaphores[index]);
}
}
VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle) {
uint32_t index = handle.index;
if (!RT_IS_HANDLE_VALID(handle) || index >= (uint32_t)rt_VkMaxSemaphores.i)
return VK_NULL_HANDLE;
if (_semaphores[index].version != handle.version)
return VK_NULL_HANDLE;
return _semaphores[index].semaphore;
}
uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore) {
uint32_t index = semaphore.index;
if (!RT_IS_HANDLE_VALID(semaphore) || index >= (uint32_t)rt_VkMaxSemaphores.i)
return 0;
if (_semaphores[index].version != semaphore.version)
return 0;
vkGetSemaphoreCounterValue(g_gpu.device,
_semaphores[index].semaphore,
&_semaphores[index].current_value);
return _semaphores[index].current_value;
}

View File

@ -0,0 +1,10 @@
#ifndef RT_VK_GPU_SYNC_H
#define RT_VK_GPU_SYNC_H
#include <volk/volk.h>
#include "runtime/renderer_api.h"
VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle);
#endif

View File

@ -549,6 +549,8 @@ extern rt_result InitRenderTargetManagement(void);
extern void ShutdownRenderTargetManagement(void);
extern rt_result InitCommandBufferManagement(void);
extern void ShutdownCommandBufferManagement(void);
extern rt_result InitializeSempahoreManagement(void);
extern void ShutdownSemaphoreManagement(void);
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
rtLog("vk", "Init");
@ -586,6 +588,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
if (res != RT_SUCCESS)
return res;
res = InitRenderTargetManagement();
if (res != RT_SUCCESS)
return res;
res = InitializeSempahoreManagement();
if (res != RT_SUCCESS)
return res;
res = InitCommandBufferManagement();
@ -603,6 +608,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) {
vkDeviceWaitIdle(g_gpu.device);
rtDestroySwapchain();
ShutdownCommandBufferManagement();
ShutdownSemaphoreManagement();
ShutdownRenderTargetManagement();
ShutdownPipelineManagement();
DestroyAllocator();

View File

@ -11,12 +11,14 @@ if vk_dep.found()
# Project Sources
'command_buffers.h',
'gpu.h',
'gpu_sync.h',
'pipelines.h',
'render_targets.h',
'swapchain.h',
'command_buffers.c',
'frame.c',
'gpu_sync.c',
'helper.c',
'init.c',
'pipelines.c',

View File

@ -36,6 +36,11 @@ extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t,
rt_command_buffer_handle *);
extern rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue,
const rt_submit_command_buffers_info *);
extern rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t,
const rt_gpu_semaphore_info *,
rt_gpu_semaphore_handle *);
extern void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *);
extern uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle);
#endif
extern rt_result InitFramegraphManager(void);
@ -69,6 +74,9 @@ static bool LoadRenderer(void) {
RETRIEVE_SYMBOL(DestroyRenderTarget, rt_destroy_render_target_fn);
RETRIEVE_SYMBOL(AllocCommandBuffers, rt_alloc_command_buffers_fn);
RETRIEVE_SYMBOL(SubmitCommandBuffers, rt_submit_command_buffers_fn);
RETRIEVE_SYMBOL(CreateSemaphores, rt_create_gpu_semaphores_fn);
RETRIEVE_SYMBOL(DestroySemaphores, rt_destroy_gpu_semaphores_fn);
RETRIEVE_SYMBOL(GetSemaphoreValue, rt_get_gpu_semaphore_value_fn);
} else {
rtReportError("GFX",
"Unsupported renderer backend: (%s) %s",
@ -88,6 +96,9 @@ static bool LoadRenderer(void) {
g_renderer.DestroyRenderTarget = &rtRenDestroyRenderTarget;
g_renderer.AllocCommandBuffers = &rtRenAllocCommandBuffers;
g_renderer.SubmitCommandBuffers = &rtRenSubmitCommandBuffers;
g_renderer.CreateSemaphores = &rtRenCreateSemaphores;
g_renderer.DestroySemaphores = &rtRenDestroySemaphores;
g_renderer.GetSemaphoreValue = &rtRenGetSemaphoreValue;
#endif
return true;
}

View File

@ -120,12 +120,20 @@ typedef struct {
typedef struct {
const rt_command_buffer_handle *command_buffers;
const rt_gpu_semaphore_handle *wait_semaphores;
const uint64_t *wait_values;
const rt_gpu_semaphore_handle *signal_semaphores;
const uint64_t *signal_values;
uint32_t command_buffer_count;
uint32_t wait_semaphore_count;
uint32_t signal_semaphore_count;
} rt_submit_command_buffers_info;
typedef struct {
/* Optional, for debug purposes */
const char *name;
uint64_t initial_value;
} rt_gpu_semaphore_info;
/* Renderer API */
typedef void rt_register_renderer_cvars_fn(void);
@ -139,7 +147,13 @@ typedef void rt_destroy_render_target_fn(rt_render_target_handle handle);
typedef rt_result rt_alloc_command_buffers_fn(uint32_t count,
const rt_alloc_command_buffer_info *info,
rt_command_buffer_handle *p_command_buffers);
typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue, const rt_submit_command_buffers_info *info);
typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue,
const rt_submit_command_buffers_info *info);
typedef rt_result rt_create_gpu_semaphores_fn(uint32_t count,
const rt_gpu_semaphore_info *info,
rt_gpu_semaphore_handle *p_semaphores);
typedef void rt_destroy_gpu_semaphores_fn(uint32_t count, rt_gpu_semaphore_handle *semaphores);
typedef uint64_t rt_get_gpu_semaphore_value_fn(rt_gpu_semaphore_handle semaphore);
typedef struct {
rt_register_renderer_cvars_fn *RegisterCVars;
@ -152,6 +166,9 @@ typedef struct {
rt_destroy_render_target_fn *DestroyRenderTarget;
rt_alloc_command_buffers_fn *AllocCommandBuffers;
rt_submit_command_buffers_fn *SubmitCommandBuffers;
rt_create_gpu_semaphores_fn *CreateSemaphores;
rt_destroy_gpu_semaphores_fn *DestroySemaphores;
rt_get_gpu_semaphore_value_fn *GetSemaphoreValue;
} rt_renderer_api;
#define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name