feat(vk): Semaphore pool
Some checks failed
Ubuntu Cross to Win64 / Cross Compile with ming64 (1.4.0, ubuntu-latest) (push) Failing after 1m38s
Ubuntu Cross to Win64 / Compile for linux (1.4.0, ubuntu-latest) (push) Successful in 1m25s

- Also adds atomic functions for linux (CAS & exchange)
- Also adds rt_bool32 type
This commit is contained in:
Kevin Trogant 2024-08-07 22:23:52 +02:00
parent 33e596c9d6
commit a6f3a04993
10 changed files with 235 additions and 5 deletions

View File

@ -629,6 +629,10 @@ rt_create_vk_device_result rtCreateVkDevice(const rt_renderer_window_info *info)
if ((res = bindless_registry_result.result) != RT_SUCCESS)
goto out;
dev.bindless_registry = bindless_registry_result.bindless_registry;
rt_create_vk_semaphore_pool_result semaphore_pool_result = rtCreateVkSemaphorePool(&dev, 128);
if ((res = semaphore_pool_result.result) != RT_SUCCESS)
goto out;
dev.semaphore_pool = semaphore_pool_result.semaphore_pool;
dev.created_pools = calloc(128 * 9, sizeof(VkCommandPool));
dev.created_pool_count = 0;

View File

@ -9,6 +9,7 @@
#include "physical_resource_manager.h"
#include "bindless_registry.h"
#include "command_buffers.h"
#include "semaphores.h"
#ifdef _WIN32
struct HINSTANCE__;
@ -78,6 +79,7 @@ typedef struct rt_vk_device {
/* *** Subsystems *** */
rt_vk_physical_resource_manager phys_res_mgr;
rt_vk_bindless_registry bindless_registry;
rt_vk_semaphore_pool semaphore_pool;
/* *** Save created command pools in a list to clean them up at exit */
VkCommandPool *created_pools;

View File

@ -17,13 +17,17 @@ if get_option('build_vk')
'command_buffers.h',
'device.h',
'physical_resource_manager.h',
'semaphores.h',
'utils.h',
'bindless_registry.c',
'command_buffers.c',
'device.c',
'init.c',
'physical_resource_manager.c',
'semaphores.c',
'swapchain.c',
'utils.c',
'vma_impl.cpp',
'../../../contrib/volk/volk.c',

View File

@ -0,0 +1,3 @@
#include <volk/volk.h>
#include <runtime/runtime.h>

View File

@ -0,0 +1,115 @@
#include "semaphores.h"
#include "device.h"
#include "utils.h"
#include <runtime/atomics.h>
#include <stdlib.h>
rt_create_vk_semaphore_pool_result rtCreateVkSemaphorePool(rt_vk_device *dev, uint32_t initial_size) {
rt_vk_semaphore_pool sem_pool;
rt_create_rwlock_result lock_res = rtCreateRWLock();
if (!lock_res.ok) {
return (rt_create_vk_semaphore_pool_result){.result = RT_UNKNOWN_ERROR};
}
sem_pool.resize_lock = lock_res.lock;
sem_pool.dev = dev;
sem_pool.acquire_index = 0;
sem_pool.size = initial_size;
sem_pool.semaphores = calloc(initial_size, sizeof(rt_vk_semaphore));
if (!sem_pool.semaphores)
return (rt_create_vk_semaphore_pool_result){.result = RT_OUT_OF_MEMORY};
for (uint32_t i = 0; i < initial_size; ++i) {
VkSemaphoreTypeCreateInfo type_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
.initialValue = 0,
};
VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.flags = 0,
.pNext = &type_info
};
VkResult res = vkCreateSemaphore(dev->device, &semaphore_info, dev->alloc_cb, &sem_pool.semaphores[i].semaphore);
if (res != VK_SUCCESS) {
for (uint32_t j = 0; j < i; ++j) {
vkDestroySemaphore(dev->device, sem_pool.semaphores[i].semaphore, dev->alloc_cb);
}
free(sem_pool.semaphores);
return (rt_create_vk_semaphore_pool_result){.result = rtVkResultToRTResult(res)};
}
sem_pool.semaphores[i].value = 0;
sem_pool.semaphores[i].in_use = RT_FALSE;
}
return (rt_create_vk_semaphore_pool_result){
.result = RT_SUCCESS,
.semaphore_pool = sem_pool,
};
}
void rtDestroyVkSemaphorePool(rt_vk_semaphore_pool *pool) {
for (uint32_t i = 0; i < pool->size; ++i) {
vkDestroySemaphore(pool->dev->device, pool->semaphores[i].semaphore, pool->dev->alloc_cb);
}
free(pool->semaphores);
}
rt_vk_semaphore *rtAcquireSemaphore(rt_vk_semaphore_pool *pool) {
rtLockRead(&pool->resize_lock);
uint32_t index = rtAtomic32Inc(&pool->acquire_index) % pool->size;
if (rtAtomic32CAS(&pool->semaphores[index].in_use, RT_TRUE, RT_FALSE) == RT_FALSE) {
/* Successfully acquired the semaphore */
rtUnlockRead(&pool->resize_lock);
return &pool->semaphores[index];
}
/* We need to resize the pool */
uint32_t pre_resize_size = pool->size;
rtUnlockRead(&pool->resize_lock);
rtLockWrite(&pool->resize_lock);
if (pool->size > pre_resize_size) {
/* Someone else was faster. Just try again */
rtUnlockWrite(&pool->resize_lock);
return rtAcquireSemaphore(pool);
}
uint32_t new_size = pre_resize_size * 2;
rt_vk_semaphore *tmp = realloc(pool->semaphores, sizeof(rt_vk_semaphore) * new_size);
if (!tmp) {
rtUnlockWrite(&pool->resize_lock);
rtReportError("VK", "Tried to grow the semaphore pool, but ran out of memory.");
return NULL;
}
pool->semaphores = tmp;
/* Create new semaphores */
for (uint32_t i = pre_resize_size; i < new_size; ++i) {
VkSemaphoreTypeCreateInfo type_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
.initialValue = 0,
};
VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.flags = 0,
.pNext = &type_info
};
VkResult res = vkCreateSemaphore(pool->dev->device, &semaphore_info, pool->dev->alloc_cb, &pool->semaphores[i].semaphore);
if (res != VK_SUCCESS) {
for (uint32_t j = pre_resize_size; j < i; ++j) {
vkDestroySemaphore(pool->dev->device, pool->semaphores[i].semaphore, pool->dev->alloc_cb);
}
rtReportError("VK", "Tried to grow the semaphore pool, but failed to create new semaphores");
}
pool->semaphores[i].value = 0;
pool->semaphores[i].in_use = RT_FALSE;
}
pool->size = new_size;
rtUnlockWrite(&pool->resize_lock);
return rtAcquireSemaphore(pool);
}
void rtReleaseSemaphore(rt_vk_semaphore *semaphore) {
rtAtomic32Exchange(&semaphore->in_use, RT_FALSE);
}

View File

@ -0,0 +1,47 @@
#ifndef RT_VK_SEMAPHORES_H
#define RT_VK_SEMAPHORES_H
#include <volk/volk.h>
#include <runtime/threading.h>
struct rt_vk_device;
typedef struct {
/* A timeline semaphore */
VkSemaphore semaphore;
/* It's current value, or the next value it will signal */
uint64_t value;
rt_bool32 in_use;
} rt_vk_semaphore;
typedef struct {
struct rt_vk_device *dev;
rt_vk_semaphore *semaphores;
/* Number of semaphores inside the pool */
uint32_t size;
/* Running index of acquire operations. Atomically incremented to fetch a "new" semaphore */
uint32_t acquire_index;
/* Locked as writing when resizing the array. During normal usage, this gets a read lock */
rt_rwlock resize_lock;
} rt_vk_semaphore_pool;
typedef struct {
rt_result result;
rt_vk_semaphore_pool semaphore_pool;
} rt_create_vk_semaphore_pool_result;
rt_create_vk_semaphore_pool_result rtCreateVkSemaphorePool(struct rt_vk_device *device, uint32_t initial_size);
void rtDestroyVkSemaphorePool(rt_vk_semaphore_pool *pool);
rt_vk_semaphore *rtAcquireSemaphore(rt_vk_semaphore_pool *pool);
void rtReleaseSemaphore(rt_vk_semaphore *semaphore);
#endif

12
src/renderer/vk/utils.c Normal file
View File

@ -0,0 +1,12 @@
#include "utils.h"
rt_result rtVkResultToRTResult(VkResult result) {
switch (result) {
case VK_SUCCESS:
return RT_SUCCESS;
case VK_ERROR_OUT_OF_HOST_MEMORY:
return RT_OUT_OF_MEMORY;
default:
return RT_UNKNOWN_ERROR;
}
}

9
src/renderer/vk/utils.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef RT_VK_UTILS_H
#define RT_VK_UTILS_H
#include <runtime/runtime.h>
#include <volk/volk.h>
rt_result rtVkResultToRTResult(VkResult result);
#endif

View File

@ -23,7 +23,8 @@
_InterlockedExchange_rel((volatile long *)(_pDest), (_NewVal))
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
_InterlockedCompareExchange_acq((volatile long *)(_pDest), (_NewVal), (_Compare))
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) _InterlockedCompareExchange_rel((volatile long *)(_pDest), (_NewVal), (_Compare
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) \
_InterlockedCompareExchange_rel((volatile long *)(_pDest), (_NewVal), (_Compare))
#else
/* x64/86 does not have acquire/release versions of these */
#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \
@ -32,7 +33,8 @@
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
_InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare))
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) _InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) \
_InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare))
#endif
#define rtAtomic32Exchange(_pDest, _NewVal) \
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
@ -49,9 +51,37 @@
#define rtAtomic32FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
#define rtAtomic64FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
/* TODO Linux versions of compare exchange
https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
*/
/* CAS "implementations" to make it conform to our expected api, i.e. return the original value of
* *dest */
#define __RT_POSIX_CAS_N_IMPL(_T, _Name) \
static RT_INLINE _T _Name(volatile _T *dest, _T new_val, _T compare, int memorder) { \
_T original = compare; \
if (!__atomic_compare_exchange_n(dest, \
&compare, \
new_val, \
false, \
memorder, \
__ATOMIC_ACQUIRE)) \
original = compare; /* Overwritten on failure with the original value */ \
return original; \
}
__RT_POSIX_CAS_N_IMPL(int, __rtPOSIXCASInt)
__RT_POSIX_CAS_N_IMPL(long, __rtPOSIXCASLong)
__RT_POSIX_CAS_N_IMPL(uint32_t, __rtPOSIXCASUint32)
#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \
__atomic_exchange_n((_pDest), (_NewVal), __ATOMIC_ACQUIRE)
#define rtAtomic32ExchangeRel(_pDest, _NewVal) \
__atomic_exchange_n((_pDest), (_NewVal), __ATOMIC_RELEASE)
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
_Generic((_NewVal), int: __rtPOSIXCASInt, long: __rtPOSIXCASLong, uint32_t: __rtPOSIXCASUint32)((_pDest), (_NewVal), (_Compare), __ATOMIC_ACQUIRE)
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) \
_Generic((_NewVal), int: __rtPOSIXCASInt, long: __rtPOSIXCASLong, uint32_t: __rtPOSIXCASUint32)((_pDest), (_NewVal), (_Compare), __ATOMIC_RELEASE)
#define rtAtomic32Exchange(_pDest, _NewVal) \
__atomic_exchange_n((_pDest), (_NewVal), __ATOMIC_SEQ_CST)
#define rtAtomic32CAS(_pDest, _NewVal, _Compare) \
_Generic((_NewVal), int: __rtPOSIXCASInt, long: __rtPOSIXCASLong, uint32_t: __rtPOSIXCASUint32)((_pDest), (_NewVal), (_Compare), __ATOMIC_SEQ_CST)
#endif

View File

@ -87,6 +87,10 @@ typedef struct {
unsigned int length;
} rt_text_span;
typedef uint32_t rt_bool32;
#define RT_TRUE 1u
#define RT_FALSE 0u
/* snprintf replacement.
* Always returns a zero terminated string.
*/