Manage command buffers

I decided to make queues explicit, to simplify handling queue ownership
transfers in the renderer code. The framegraph + pass code has explicit knowledge
about resource ownership, so it makes sense to handle it there.

- Manage pools
- Allocate command buffers
- Submit command buffers
This commit is contained in:
Kevin Trogant 2024-02-13 08:35:19 +01:00
parent b44b3e651b
commit 887fa63c43
16 changed files with 455 additions and 47 deletions

View File

@ -1,14 +1,284 @@
#include "runtime/renderer_api.h" #include "gpu.h"
#include "swapchain.h"
rt_result #include "runtime/atomics.h"
RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, #include "runtime/config.h"
rt_render_command_buffer_handle *p_command_buffers, #include "runtime/mem_arena.h"
rt_gpu_semaphore_handle *p_semaphores) { #include "runtime/renderer_api.h"
#include "runtime/runtime.h"
#include <stdlib.h>
RT_CVAR_I(rt_VkMaxCommandPools,
"Maximum number of command pools that can be created. Default: 32",
32);
RT_CVAR_I(
rt_VkCommandBufferRingBufferSize,
"Size of the ring buffer used to store command buffers. Must be a power of two! Default: 512",
512);
typedef struct {
VkCommandPool pools[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT * 3];
uint32_t distinct_pool_count;
VkCommandPool *compute_pools;
VkCommandPool *graphics_pools;
VkCommandPool *transfer_pools;
} rt_thread_pools;
typedef struct {
VkCommandBuffer command_buffer;
uint32_t version;
rt_gpu_queue target_queue;
} rt_command_buffer;
static rt_thread_pools *_pools;
static uint32_t _next_pools;
static RT_THREAD_LOCAL unsigned int t_first_pool;
static rt_command_buffer *_command_buffers;
/* We let this overflow on its own. Use MOD rt_VkCommandBufferRingBufferSize to get the actual
* index. */
static uint32_t _next_command_buffer;
rt_result InitCommandBufferManagement(void) {
_pools = calloc((size_t)rt_VkMaxCommandPools.i, sizeof(rt_thread_pools));
if (!_pools)
return RT_OUT_OF_MEMORY;
_command_buffers =
calloc((size_t)rt_VkCommandBufferRingBufferSize.i, sizeof(rt_command_buffer));
if (!_command_buffers) {
free(_pools);
return RT_OUT_OF_MEMORY;
}
/* Keep 0 free as a "Not initialized" value for t_first_pool */
_next_pools = 1;
return RT_SUCCESS; return RT_SUCCESS;
} }
rt_result static void DestroyPools(rt_thread_pools *pools) {
RT_RENDERER_API_FN(SubmitCommandBuffers)(uint32_t count, for (uint32_t j = 0; j < pools->distinct_pool_count; ++j)
const rt_render_command_buffer_handle *command_buffers) { vkDestroyCommandPool(g_gpu.device, pools->pools[j], g_gpu.alloc_cb);
free(_pools);
}
void ShutdownCommandBufferManagement(void) {
/* _next_pools is the number of existing pools */
for (uint32_t i = 1; i < _next_pools; ++i) {
DestroyPools(&_pools[i]);
}
}
void rtResetCommandPools(unsigned int frame_id) {
unsigned int pool_idx = frame_id % g_gpu.max_frames_in_flight;
for (uint32_t i = 1; i < _next_pools; ++i) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].graphics_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset graphics pool slot %u index %u", i, pool_idx);
}
if (_pools[i].compute_pools != _pools[i].graphics_pools) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].compute_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset compute pool slot %u index %u", i, pool_idx);
}
}
if (_pools[i].transfer_pools != _pools[i].graphics_pools &&
_pools[i].transfer_pools != _pools[i].compute_pools) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].transfer_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset transfer pool slot %u index %u", i, pool_idx);
}
}
}
}
static rt_result CreatePools(rt_thread_pools *pools) {
/* Graphics pools */
pools->graphics_pools = pools->pools;
pools->distinct_pool_count = 0;
VkCommandPoolCreateInfo graphics_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.graphics_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT};
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&graphics_info,
g_gpu.alloc_cb,
&pools->graphics_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a graphics command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
if (g_gpu.compute_family != g_gpu.graphics_family) {
VkCommandPoolCreateInfo compute_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.compute_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
};
pools->compute_pools = &pools->pools[pools->distinct_pool_count];
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&compute_info,
g_gpu.alloc_cb,
&pools->compute_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a compute command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
} else {
pools->compute_pools = pools->graphics_pools;
}
if (g_gpu.transfer_family != g_gpu.graphics_family &&
g_gpu.transfer_family != g_gpu.compute_family) {
VkCommandPoolCreateInfo transfer_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.transfer_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
};
pools->transfer_pools = &pools->pools[pools->distinct_pool_count];
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&transfer_info,
g_gpu.alloc_cb,
&pools->transfer_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a transfer command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
} else if (g_gpu.transfer_family == g_gpu.graphics_family) {
pools->transfer_pools = pools->graphics_pools;
} else if (g_gpu.transfer_family == g_gpu.compute_family) {
pools->transfer_pools = pools->compute_pools;
}
return RT_SUCCESS; return RT_SUCCESS;
}
rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
const rt_alloc_command_buffer_info *info,
rt_command_buffer_handle *p_command_buffers) {
rt_thread_pools *pools = &_pools[t_first_pool];
if (t_first_pool == 0) {
/* Acquire pools */
t_first_pool = rtAtomic32Inc(&_next_pools);
RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
pools = &_pools[t_first_pool];
rt_result create_res = CreatePools(pools);
if (create_res != RT_SUCCESS)
return create_res;
}
if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
return RT_OUT_OF_MEMORY;
uint32_t frame_id = 0;
rt_result result = RT_SUCCESS;
/* TODO: We should probably batch allocations of the same type */
uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
uint32_t start = rtAtomic32FetchAdd(&_next_command_buffer, count);
for (uint32_t i = 0; i < count; ++i) {
uint32_t slot = (start + i) % mod;
_command_buffers[slot].version =
(_command_buffers[slot].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
if (_command_buffers[slot].version == 0)
_command_buffers[slot].version = 1;
VkCommandPool pool = pools->graphics_pools[frame_id];
if (info[i].target_queue == RT_COMPUTE_QUEUE)
pool = pools->compute_pools[frame_id];
else if (info[i].target_queue == RT_TRANSFER_QUEUE)
pool = pools->transfer_pools[frame_id];
_command_buffers[slot].target_queue = info[i].target_queue;
VkCommandBufferAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
.commandPool = pool,
};
if (vkAllocateCommandBuffers(g_gpu.device,
&alloc_info,
&_command_buffers[slot].command_buffer) != VK_SUCCESS) {
result = RT_UNKNOWN_ERROR;
break;
}
p_command_buffers[i].index = slot;
p_command_buffers[i].version = _command_buffers[slot].version;
}
return result;
}
rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
const rt_submit_command_buffers_info *info) {
uint32_t count = info->command_buffer_count;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return RT_OUT_OF_MEMORY;
rt_result result = RT_SUCCESS;
VkQueue target_queue = rtGetQueue(queue);
VkCommandBuffer *command_buffers = RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBuffer, count);
if (!command_buffers) {
result = RT_OUT_OF_MEMORY;
goto out;
}
for (uint32_t i = 0; i < count; ++i) {
uint32_t slot = info->command_buffers[i].index;
if (_command_buffers[slot].version != info->command_buffers[i].version) {
rtLog("vk",
"Mismatch between handle version and stored version while submitting a command "
"buffer");
result = RT_INVALID_VALUE;
goto out;
}
if (_command_buffers[slot].target_queue != queue) {
rtLog("vk", "Mismatch between command buffer target queue and submit target queue.");
result = RT_INVALID_VALUE;
goto out;
}
command_buffers[i] = _command_buffers[slot].command_buffer;
}
/* TODO(Kevin): Retrieve semaphores */
VkSemaphore *wait_semaphores = NULL;
VkSemaphore *signal_semaphores = NULL;
uint32_t wait_count = 0;
uint32_t signal_count = 0;
VkSubmitInfo submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pCommandBuffers = command_buffers,
.commandBufferCount = count,
.pWaitSemaphores = wait_semaphores,
.pWaitDstStageMask = NULL,
.waitSemaphoreCount = wait_count,
.pSignalSemaphores = signal_semaphores,
.signalSemaphoreCount = signal_count,
};
if (vkQueueSubmit(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
rtLog("vk", "vkQueueSubmit failed.");
result = RT_UNKNOWN_ERROR;
}
out:
rtReturnTemporaryArena(temp);
return result;
} }

View File

@ -3,4 +3,6 @@
#include "runtime/runtime.h" #include "runtime/runtime.h"
void rtResetCommandPools(unsigned int frame_id);
#endif #endif

9
src/renderer/vk/frame.c Normal file
View File

@ -0,0 +1,9 @@
#include "gpu.h"
#include "command_buffers.h"
#include "runtime/renderer_api.h"
void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
g_gpu.current_frame_id = frame_id;
rtResetCommandPools(frame_id);
}

View File

@ -9,6 +9,13 @@
#include "runtime/renderer_api.h" #include "runtime/renderer_api.h"
/* Minimum supported value of g_gpu.max_frames_in_flight */
#define RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT 2
/* Maximum supported number of frames in flight.
* The actually configured value is contained in g_gpu. */
#define RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT 3
#ifdef _WIN32 #ifdef _WIN32
struct HINSTANCE__; struct HINSTANCE__;
struct HWND__; struct HWND__;
@ -36,9 +43,11 @@ typedef struct {
VkQueue graphics_queue; VkQueue graphics_queue;
VkQueue compute_queue; VkQueue compute_queue;
VkQueue present_queue; VkQueue present_queue;
VkQueue transfer_queue;
uint32_t graphics_family; uint32_t graphics_family;
uint32_t compute_family; uint32_t compute_family;
uint32_t present_family; uint32_t present_family;
uint32_t transfer_family;
rt_native_window native_window; rt_native_window native_window;
@ -48,6 +57,9 @@ typedef struct {
VkPhysicalDeviceFeatures phys_device_features; VkPhysicalDeviceFeatures phys_device_features;
VmaAllocator allocator; VmaAllocator allocator;
unsigned int max_frames_in_flight;
unsigned int current_frame_id;
} rt_vk_gpu; } rt_vk_gpu;
#ifndef RT_VK_DONT_DEFINE_GPU_GLOBAL #ifndef RT_VK_DONT_DEFINE_GPU_GLOBAL
@ -60,4 +72,6 @@ VkFormat rtPixelFormatToVkFormat(rt_pixel_format format);
VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count); VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count);
VkQueue rtGetQueue(rt_gpu_queue queue);
#endif #endif

View File

@ -40,4 +40,17 @@ VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count) {
break; break;
} }
return (VkSampleCountFlagBits)count; return (VkSampleCountFlagBits)count;
}
VkQueue rtGetQueue(rt_gpu_queue queue) {
switch (queue) {
case RT_GRAPHICS_QUEUE:
return g_gpu.graphics_queue;
case RT_COMPUTE_QUEUE:
return g_gpu.compute_queue;
case RT_TRANSFER_QUEUE:
return g_gpu.transfer_queue;
default:
return VK_NULL_HANDLE;
}
} }

View File

@ -19,6 +19,8 @@ RT_CVAR_I(r_VkEnableAPIAllocTracking,
RT_CVAR_S(r_VkPhysDeviceName, "Name of the selected physical device. Default: \"\"", ""); RT_CVAR_S(r_VkPhysDeviceName, "Name of the selected physical device. Default: \"\"", "");
RT_CVAR_I(r_VkMaxFramesInFlight, "Maximum number of frames in flight. [2/3] Default: 2", 2);
rt_vk_gpu g_gpu; rt_vk_gpu g_gpu;
static VkAllocationCallbacks _tracking_alloc_cbs; static VkAllocationCallbacks _tracking_alloc_cbs;
@ -82,12 +84,15 @@ DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
extern rt_cvar r_VkPreferredSwapchainImages; extern rt_cvar r_VkPreferredSwapchainImages;
extern rt_cvar r_VkPreferMailboxMode; extern rt_cvar r_VkPreferMailboxMode;
extern rt_cvar r_VkMaxPipelineCount;
void RT_RENDERER_API_FN(RegisterCVars)(void) { void RT_RENDERER_API_FN(RegisterCVars)(void) {
rtRegisterCVAR(&r_VkEnableAPIAllocTracking); rtRegisterCVAR(&r_VkEnableAPIAllocTracking);
rtRegisterCVAR(&r_VkPhysDeviceName); rtRegisterCVAR(&r_VkPhysDeviceName);
rtRegisterCVAR(&r_VkPreferredSwapchainImages); rtRegisterCVAR(&r_VkPreferredSwapchainImages);
rtRegisterCVAR(&r_VkPreferMailboxMode); rtRegisterCVAR(&r_VkPreferMailboxMode);
rtRegisterCVAR(&r_VkMaxFramesInFlight);
rtRegisterCVAR(&r_VkMaxPipelineCount);
} }
static rt_result CreateInstance(void) { static rt_result CreateInstance(void) {
@ -211,12 +216,14 @@ typedef struct {
uint32_t graphics; uint32_t graphics;
uint32_t compute; uint32_t compute;
uint32_t present; uint32_t present;
uint32_t transfer;
} rt_queue_indices; } rt_queue_indices;
static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfaceKHR surface) { static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfaceKHR surface) {
rt_queue_indices indices = {.graphics = UINT32_MAX, rt_queue_indices indices = {.graphics = UINT32_MAX,
.compute = UINT32_MAX, .compute = UINT32_MAX,
.present = UINT32_MAX}; .present = UINT32_MAX,
.transfer = UINT32_MAX};
uint32_t count = 0; uint32_t count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, NULL); vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, NULL);
@ -232,12 +239,20 @@ static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfac
indices.graphics = i; indices.graphics = i;
if ((props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0) if ((props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
indices.compute = i; indices.compute = i;
if ((props[i].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
indices.transfer = i;
VkBool32 present_supported = VK_FALSE; VkBool32 present_supported = VK_FALSE;
vkGetPhysicalDeviceSurfaceSupportKHR(phys_dev, i, surface, &present_supported); vkGetPhysicalDeviceSurfaceSupportKHR(phys_dev, i, surface, &present_supported);
if (present_supported) if (present_supported)
indices.present = i; indices.present = i;
} }
if (indices.transfer == UINT32_MAX && indices.graphics != UINT32_MAX)
indices.transfer = indices.graphics;
else if (indices.transfer == UINT32_MAX && indices.compute != UINT32_MAX)
indices.transfer = indices.compute;
free(props); free(props);
return indices; return indices;
} }
@ -405,11 +420,12 @@ static rt_result CreateDevice(void) {
g_gpu.compute_family = queue_indices.compute; g_gpu.compute_family = queue_indices.compute;
g_gpu.graphics_family = queue_indices.graphics; g_gpu.graphics_family = queue_indices.graphics;
g_gpu.present_family = queue_indices.present; g_gpu.present_family = queue_indices.present;
g_gpu.transfer_family = queue_indices.transfer;
float priority = 1.f; float priority = 1.f;
uint32_t distinct_queue_count = 1; uint32_t distinct_queue_count = 1;
VkDeviceQueueCreateInfo queue_info[3]; VkDeviceQueueCreateInfo queue_info[4];
queue_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue_info[0].pNext = NULL; queue_info[0].pNext = NULL;
queue_info[0].flags = 0; queue_info[0].flags = 0;
@ -433,6 +449,17 @@ static rt_result CreateDevice(void) {
queue_info[distinct_queue_count].queueCount = 1; queue_info[distinct_queue_count].queueCount = 1;
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.present; queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.present;
queue_info[distinct_queue_count].pQueuePriorities = &priority; queue_info[distinct_queue_count].pQueuePriorities = &priority;
++distinct_queue_count;
}
if (queue_indices.transfer != queue_indices.graphics &&
queue_indices.transfer != queue_indices.compute) {
queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue_info[distinct_queue_count].pNext = NULL;
queue_info[distinct_queue_count].flags = 0;
queue_info[distinct_queue_count].queueCount = 1;
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.transfer;
queue_info[distinct_queue_count].pQueuePriorities = &priority;
++distinct_queue_count;
} }
VkPhysicalDeviceDescriptorIndexingFeatures indexing_features = { VkPhysicalDeviceDescriptorIndexingFeatures indexing_features = {
@ -463,6 +490,7 @@ static rt_result CreateDevice(void) {
vkGetDeviceQueue(g_gpu.device, queue_indices.graphics, 0, &g_gpu.graphics_queue); vkGetDeviceQueue(g_gpu.device, queue_indices.graphics, 0, &g_gpu.graphics_queue);
vkGetDeviceQueue(g_gpu.device, queue_indices.compute, 0, &g_gpu.compute_queue); vkGetDeviceQueue(g_gpu.device, queue_indices.compute, 0, &g_gpu.compute_queue);
vkGetDeviceQueue(g_gpu.device, queue_indices.present, 0, &g_gpu.present_queue); vkGetDeviceQueue(g_gpu.device, queue_indices.present, 0, &g_gpu.present_queue);
vkGetDeviceQueue(g_gpu.device, queue_indices.transfer, 0, &g_gpu.transfer_queue);
return RT_SUCCESS; return RT_SUCCESS;
} }
@ -519,6 +547,8 @@ extern rt_result InitPipelineManagement(void);
extern void ShutdownPipelineManagement(void); extern void ShutdownPipelineManagement(void);
extern rt_result InitRenderTargetManagement(void); extern rt_result InitRenderTargetManagement(void);
extern void ShutdownRenderTargetManagement(void); extern void ShutdownRenderTargetManagement(void);
extern rt_result InitCommandBufferManagement(void);
extern void ShutdownCommandBufferManagement(void);
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
rtLog("vk", "Init"); rtLog("vk", "Init");
@ -533,6 +563,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
} else { } else {
g_gpu.alloc_cb = NULL; g_gpu.alloc_cb = NULL;
} }
g_gpu.max_frames_in_flight = RT_RESTRICT_VALUE_TO_BOUNDS(r_VkMaxFramesInFlight.i,
RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT,
RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT);
int res = CreateInstance(); int res = CreateInstance();
if (res != RT_SUCCESS) if (res != RT_SUCCESS)
@ -553,6 +586,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
if (res != RT_SUCCESS) if (res != RT_SUCCESS)
return res; return res;
res = InitRenderTargetManagement(); res = InitRenderTargetManagement();
if (res != RT_SUCCESS)
return res;
res = InitCommandBufferManagement();
if (res != RT_SUCCESS) if (res != RT_SUCCESS)
return res; return res;
res = rtCreateSwapchain(); res = rtCreateSwapchain();
@ -566,6 +602,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) {
rtLog("vk", "Shutdown"); rtLog("vk", "Shutdown");
vkDeviceWaitIdle(g_gpu.device); vkDeviceWaitIdle(g_gpu.device);
rtDestroySwapchain(); rtDestroySwapchain();
ShutdownCommandBufferManagement();
ShutdownRenderTargetManagement(); ShutdownRenderTargetManagement();
ShutdownPipelineManagement(); ShutdownPipelineManagement();
DestroyAllocator(); DestroyAllocator();

View File

@ -16,6 +16,7 @@ if vk_dep.found()
'swapchain.h', 'swapchain.h',
'command_buffers.c', 'command_buffers.c',
'frame.c',
'helper.c', 'helper.c',
'init.c', 'init.c',
'pipelines.c', 'pipelines.c',

View File

@ -121,12 +121,7 @@ rt_result rtCreateSwapchain(void) {
return 50; return 50;
} }
g_swapchain.format = device_params.surface_format.format; g_swapchain.format = device_params.surface_format.format;
g_swapchain.extent = g_swapchain.extent = device_params.extent;
device_params.extent;
/* Retrieve images */ /* Retrieve images */
g_swapchain.image_count = 0; g_swapchain.image_count = 0;

29
src/runtime/atomics.h Normal file
View File

@ -0,0 +1,29 @@
#ifndef RT_ATOMICS_H
#define RT_ATOMICS_H
/* Macros & helpers for atomic instructions */
#ifdef _MSC_VER
/* Increment and decrement return the new value */
#define rtAtomic32Inc(pa) _InterlockedIncrement((volatile LONG *)(pa))
#define rtAtomic64Inc(pa) _InterlockedIncrement64((volatile LONG64 *)(pa))
#define rtAtomic32Dec(pa) _InterlockedDecrement((volatile LONG *)(pa))
#define rtAtomic64Dec(pa) _InterlockedDecrement64((volatile LONG64 *)(pa))
#define rtAtomic32FetchAdd(pa, value) _InterlockedExchangeAdd((volatile LONG *)(pa), (LONG)(value))
#define rtAtomic64FetchAdd(pa, value) _InterlockedExchangeAdd64((volatile LONG64 *)(pa), (LONG)(value))
#elif defined(__GNUC__) || defined(__clang__)
#define rtAtomic32Inc(pa) __atomic_add_fetch((pa), 1, __ATOMIC_SEQ_CST)
#define rtAtomic64Inc(pa) __atomic_add_fetch((pa), 1LL, __ATOMIC_SEQ_CST)
#define rtAtomic32Dec(pa) __atomic_sub_fetch((pa), 1, __ATOMIC_SEQ_CST)
#define rtAtomic64Dec(pa) __atomic_sub_fetch((pa), 1LL, __ATOMIC_SEQ_CST)
#define rtAtomic32FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
#define rtAtomic64FetchAdd(pa, value) _-atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
#endif
#endif

View File

@ -61,6 +61,8 @@ RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info);
RT_DLLEXPORT void rtShutdownGFX(void); RT_DLLEXPORT void rtShutdownGFX(void);
RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id);
/* ********************************************************************* /* *********************************************************************
* Framegraph API * Framegraph API
* *

View File

@ -25,17 +25,17 @@ RT_CVAR_S(rt_Renderer, "Select the render backend. Available options: [vk], Defa
extern void RT_RENDERER_API_FN(RegisterCVars)(void); extern void RT_RENDERER_API_FN(RegisterCVars)(void);
extern rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *); extern rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *);
extern void RT_RENDERER_API_FN(Shutdown)(void); extern void RT_RENDERER_API_FN(Shutdown)(void);
extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int);
extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *); extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *);
extern void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle); extern void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle);
extern rt_render_target_handle extern rt_render_target_handle
RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *); RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *);
extern void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle); extern void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle);
extern rt_result extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t,
RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, const rt_alloc_command_buffer_info *,
rt_render_command_buffer_handle *p_command_buffers, rt_command_buffer_handle *);
rt_gpu_semaphore_handle *p_semaphores); extern rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue,
extern rt_result RT_RENDERER_API_FN( const rt_submit_command_buffers_info *);
SubmitCommandBuffers)(uint32_t count, const rt_render_command_buffer_handle *command_buffers);
#endif #endif
extern rt_result InitFramegraphManager(void); extern rt_result InitFramegraphManager(void);
@ -62,6 +62,7 @@ static bool LoadRenderer(void) {
RETRIEVE_SYMBOL(RegisterCVars, rt_register_renderer_cvars_fn); RETRIEVE_SYMBOL(RegisterCVars, rt_register_renderer_cvars_fn);
RETRIEVE_SYMBOL(Init, rt_init_renderer_fn); RETRIEVE_SYMBOL(Init, rt_init_renderer_fn);
RETRIEVE_SYMBOL(Shutdown, rt_shutdown_renderer_fn); RETRIEVE_SYMBOL(Shutdown, rt_shutdown_renderer_fn);
RETRIEVE_SYMBOL(BeginFrame, rt_begin_frame_fn);
RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn); RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn);
RETRIEVE_SYMBOL(DestroyPipeline, rt_destroy_pipeline_fn); RETRIEVE_SYMBOL(DestroyPipeline, rt_destroy_pipeline_fn);
RETRIEVE_SYMBOL(CreateRenderTarget, rt_create_render_target_fn); RETRIEVE_SYMBOL(CreateRenderTarget, rt_create_render_target_fn);
@ -80,6 +81,7 @@ static bool LoadRenderer(void) {
g_renderer.RegisterCVars = &rtRenRegisterCVars; g_renderer.RegisterCVars = &rtRenRegisterCVars;
g_renderer.Init = &rtRenInit; g_renderer.Init = &rtRenInit;
g_renderer.Shutdown = &rtRenShutdown; g_renderer.Shutdown = &rtRenShutdown;
g_renderer.BeginFrame = &rtRenBeginFrame;
g_renderer.CompilePipeline = &rtRenCompilePipeline; g_renderer.CompilePipeline = &rtRenCompilePipeline;
g_renderer.DestroyPipeline = &rtRenDestroyPipeline; g_renderer.DestroyPipeline = &rtRenDestroyPipeline;
g_renderer.CreateRenderTarget = &rtRenCreateRenderTarget; g_renderer.CreateRenderTarget = &rtRenCreateRenderTarget;
@ -121,3 +123,7 @@ RT_DLLEXPORT void rtShutdownGFX(void) {
ShutdownFramegraphManager(); ShutdownFramegraphManager();
g_renderer.Shutdown(); g_renderer.Shutdown();
} }
RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id) {
g_renderer.BeginFrame(frame_id);
}

View File

@ -2,6 +2,7 @@
#include "main_loop.h" #include "main_loop.h"
#include "runtime.h" #include "runtime.h"
#include "config.h" #include "config.h"
#include "gfx.h"
RT_CVAR_I(rt_MaxFrameLatency, "Maximum latency between update and rendering. Default: 2", 2); RT_CVAR_I(rt_MaxFrameLatency, "Maximum latency between update and rendering. Default: 2", 2);
@ -16,11 +17,11 @@ void UpdateThreadEntry(void *param) {
while (!g_main_loop.shutdown) { while (!g_main_loop.shutdown) {
/* Wait until the render thread has catched up */ /* Wait until the render thread has catched up */
rtWaitOnSemaphore(&g_main_loop.update_proceed); rtWaitOnSemaphore(&g_main_loop.update_proceed);
rtLog("UT", "Processing %d", g_main_loop.u_frame_id); rtLog("UT", "Processing %u", g_main_loop.u_frame_id);
(g_main_loop.GameUpdate)(); (g_main_loop.GameUpdate)();
rtLog("UT", "Finished %d", g_main_loop.u_frame_id); rtLog("UT", "Finished %u", g_main_loop.u_frame_id);
g_main_loop.u_frame_id += 1; g_main_loop.u_frame_id += 1;
/* Signal the render thread that data is available */ /* Signal the render thread that data is available */
rtSignalSemaphore(&g_main_loop.render_proceed); rtSignalSemaphore(&g_main_loop.render_proceed);
@ -35,11 +36,12 @@ void RenderThreadEntry(void *param) {
rtLog("RT", "RenderThread Entry"); rtLog("RT", "RenderThread Entry");
while (!g_main_loop.shutdown) { while (!g_main_loop.shutdown) {
rtWaitOnSemaphore(&g_main_loop.render_proceed); rtWaitOnSemaphore(&g_main_loop.render_proceed);
rtLog("RT", "Processing %d", g_main_loop.r_frame_id); rtLog("RT", "Processing %u", g_main_loop.r_frame_id);
rtBeginGFXFrame(g_main_loop.r_frame_id);
(g_main_loop.GameRender)(); (g_main_loop.GameRender)();
rtLog("RT", "Finished %d", g_main_loop.r_frame_id); rtLog("RT", "Finished %u", g_main_loop.r_frame_id);
g_main_loop.r_frame_id += 1; g_main_loop.r_frame_id += 1;
/* Signal the update thread that we have finished and it can proceed */ /* Signal the update thread that we have finished and it can proceed */
rtSignalSemaphore(&g_main_loop.update_proceed); rtSignalSemaphore(&g_main_loop.update_proceed);

View File

@ -8,8 +8,8 @@ typedef void rt_main_loop_update_fn(void);
typedef void rt_main_loop_render_fn(void); typedef void rt_main_loop_render_fn(void);
typedef struct { typedef struct {
int u_frame_id; unsigned int u_frame_id;
int r_frame_id; unsigned int r_frame_id;
rt_semaphore update_proceed; rt_semaphore update_proceed;
rt_semaphore render_proceed; rt_semaphore render_proceed;

View File

@ -5,6 +5,7 @@ runtime_lib = library('rt',
# Project Sources # Project Sources
'aio.h', 'aio.h',
'app.h', 'app.h',
'atomics.h',
'buffer_manager.h', 'buffer_manager.h',
'compression.h', 'compression.h',
'config.h', 'config.h',

View File

@ -13,6 +13,25 @@
extern "C" { extern "C" {
#endif #endif
/* Handles for backend objects */
#define RT_RENDER_BACKEND_HANDLE_MAX_VERSION 255
#define RT_RENDER_BACKEND_HANDLE(name) \
typedef struct { \
uint32_t version : 8; \
uint32_t index : 24; \
} name
RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle);
RT_RENDER_BACKEND_HANDLE(rt_render_target_handle);
RT_RENDER_BACKEND_HANDLE(rt_command_buffer_handle);
RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle);
#undef RT_RENDER_BACKEND_HANDLE
/* Init data for the renderer */
#ifdef _WIN32 #ifdef _WIN32
struct HINSTANCE__; struct HINSTANCE__;
struct HWND__; struct HWND__;
@ -30,6 +49,14 @@ struct rt_renderer_init_info_s {
#endif #endif
}; };
/* Argument types for render commands */
typedef enum {
RT_GRAPHICS_QUEUE,
RT_COMPUTE_QUEUE,
RT_TRANSFER_QUEUE,
} rt_gpu_queue;
typedef struct { typedef struct {
rt_resource_id vertex_shader; rt_resource_id vertex_shader;
rt_resource_id fragment_shader; rt_resource_id fragment_shader;
@ -86,41 +113,39 @@ typedef struct {
size_t bytecode_length; size_t bytecode_length;
} rt_shader_info; } rt_shader_info;
/* Handles for backend objects */ typedef struct {
rt_gpu_queue target_queue;
} rt_alloc_command_buffer_info;
#define RT_RENDER_BACKEND_HANDLE_MAX_VERSION 255 typedef struct {
const rt_command_buffer_handle *command_buffers;
const rt_gpu_semaphore_handle *wait_semaphores;
const rt_gpu_semaphore_handle *signal_semaphores;
uint32_t command_buffer_count;
uint32_t wait_semaphore_count;
uint32_t signal_semaphore_count;
} rt_submit_command_buffers_info;
#define RT_RENDER_BACKEND_HANDLE(name) \ /* Renderer API */
typedef struct { \
uint32_t version : 8; \
uint32_t index : 24; \
} name
RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle);
RT_RENDER_BACKEND_HANDLE(rt_render_target_handle);
RT_RENDER_BACKEND_HANDLE(rt_render_command_buffer_handle);
RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle);
#undef RT_RENDER_BACKEND_HANDLE
typedef void rt_register_renderer_cvars_fn(void); typedef void rt_register_renderer_cvars_fn(void);
typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info); typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info);
typedef void rt_shutdown_renderer_fn(void); typedef void rt_shutdown_renderer_fn(void);
typedef void rt_begin_frame_fn(unsigned int frame_id);
typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info); typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info);
typedef void rt_destroy_pipeline_fn(rt_pipeline_handle handle); typedef void rt_destroy_pipeline_fn(rt_pipeline_handle handle);
typedef rt_render_target_handle rt_create_render_target_fn(const rt_render_target_info *info); typedef rt_render_target_handle rt_create_render_target_fn(const rt_render_target_info *info);
typedef void rt_destroy_render_target_fn(rt_render_target_handle handle); typedef void rt_destroy_render_target_fn(rt_render_target_handle handle);
typedef rt_result rt_alloc_command_buffers_fn(uint32_t count, typedef rt_result rt_alloc_command_buffers_fn(uint32_t count,
rt_render_command_buffer_handle *p_command_buffers, const rt_alloc_command_buffer_info *info,
rt_gpu_semaphore_handle *p_semaphores); rt_command_buffer_handle *p_command_buffers);
typedef rt_result typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue, const rt_submit_command_buffers_info *info);
rt_submit_command_buffers_fn(uint32_t count,
const rt_render_command_buffer_handle *command_buffers);
typedef struct { typedef struct {
rt_register_renderer_cvars_fn *RegisterCVars; rt_register_renderer_cvars_fn *RegisterCVars;
rt_init_renderer_fn *Init; rt_init_renderer_fn *Init;
rt_shutdown_renderer_fn *Shutdown; rt_shutdown_renderer_fn *Shutdown;
rt_begin_frame_fn *BeginFrame;
rt_compile_pipeline_fn *CompilePipeline; rt_compile_pipeline_fn *CompilePipeline;
rt_destroy_pipeline_fn *DestroyPipeline; rt_destroy_pipeline_fn *DestroyPipeline;
rt_create_render_target_fn *CreateRenderTarget; rt_create_render_target_fn *CreateRenderTarget;

View File

@ -28,6 +28,8 @@ extern "C" {
#define RT_UNUSED(x) ((void)sizeof((x))) #define RT_UNUSED(x) ((void)sizeof((x)))
#define RT_ARRAY_COUNT(x) (sizeof((x)) / sizeof((x)[0])) #define RT_ARRAY_COUNT(x) (sizeof((x)) / sizeof((x)[0]))
#define RT_RESTRICT_VALUE_TO_BOUNDS(v, lower, upper) (((v) < (lower)) ? (lower) : (((v) > (upper)) ? (upper) : (v)))
#define RT_KB(n) ((n)*1024U) #define RT_KB(n) ((n)*1024U)
#define RT_MB(n) ((n)*1024U * 1024U) #define RT_MB(n) ((n)*1024U * 1024U)
#define RT_GB(n) ((n)*1024U * 1024U * 1024U) #define RT_GB(n) ((n)*1024U * 1024U * 1024U)