Manage command buffers
I decided to make queues explicit, to simplify handling queue ownership transfers in the renderer code. The framegraph + pass code has explicit knowledge about resource ownership, so it makes sense to handle it there. - Manage pools - Allocate command buffers - Submit command buffers
This commit is contained in:
parent
b44b3e651b
commit
887fa63c43
@ -1,14 +1,284 @@
|
||||
#include "gpu.h"
|
||||
#include "swapchain.h"
|
||||
|
||||
#include "runtime/atomics.h"
|
||||
#include "runtime/config.h"
|
||||
#include "runtime/mem_arena.h"
|
||||
#include "runtime/renderer_api.h"
|
||||
#include "runtime/runtime.h"
|
||||
|
||||
rt_result
|
||||
RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
rt_render_command_buffer_handle *p_command_buffers,
|
||||
rt_gpu_semaphore_handle *p_semaphores) {
|
||||
#include <stdlib.h>
|
||||
|
||||
RT_CVAR_I(rt_VkMaxCommandPools,
|
||||
"Maximum number of command pools that can be created. Default: 32",
|
||||
32);
|
||||
RT_CVAR_I(
|
||||
rt_VkCommandBufferRingBufferSize,
|
||||
"Size of the ring buffer used to store command buffers. Must be a power of two! Default: 512",
|
||||
512);
|
||||
|
||||
typedef struct {
|
||||
VkCommandPool pools[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT * 3];
|
||||
uint32_t distinct_pool_count;
|
||||
|
||||
VkCommandPool *compute_pools;
|
||||
VkCommandPool *graphics_pools;
|
||||
VkCommandPool *transfer_pools;
|
||||
} rt_thread_pools;
|
||||
|
||||
typedef struct {
|
||||
VkCommandBuffer command_buffer;
|
||||
uint32_t version;
|
||||
rt_gpu_queue target_queue;
|
||||
} rt_command_buffer;
|
||||
|
||||
static rt_thread_pools *_pools;
|
||||
static uint32_t _next_pools;
|
||||
static RT_THREAD_LOCAL unsigned int t_first_pool;
|
||||
|
||||
static rt_command_buffer *_command_buffers;
|
||||
/* We let this overflow on its own. Use MOD rt_VkCommandBufferRingBufferSize to get the actual
|
||||
* index. */
|
||||
static uint32_t _next_command_buffer;
|
||||
|
||||
rt_result InitCommandBufferManagement(void) {
|
||||
_pools = calloc((size_t)rt_VkMaxCommandPools.i, sizeof(rt_thread_pools));
|
||||
if (!_pools)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
|
||||
_command_buffers =
|
||||
calloc((size_t)rt_VkCommandBufferRingBufferSize.i, sizeof(rt_command_buffer));
|
||||
if (!_command_buffers) {
|
||||
free(_pools);
|
||||
return RT_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
/* Keep 0 free as a "Not initialized" value for t_first_pool */
|
||||
_next_pools = 1;
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
rt_result
|
||||
RT_RENDERER_API_FN(SubmitCommandBuffers)(uint32_t count,
|
||||
const rt_render_command_buffer_handle *command_buffers) {
|
||||
static void DestroyPools(rt_thread_pools *pools) {
|
||||
for (uint32_t j = 0; j < pools->distinct_pool_count; ++j)
|
||||
vkDestroyCommandPool(g_gpu.device, pools->pools[j], g_gpu.alloc_cb);
|
||||
free(_pools);
|
||||
}
|
||||
|
||||
void ShutdownCommandBufferManagement(void) {
|
||||
/* _next_pools is the number of existing pools */
|
||||
for (uint32_t i = 1; i < _next_pools; ++i) {
|
||||
DestroyPools(&_pools[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void rtResetCommandPools(unsigned int frame_id) {
|
||||
unsigned int pool_idx = frame_id % g_gpu.max_frames_in_flight;
|
||||
for (uint32_t i = 1; i < _next_pools; ++i) {
|
||||
if (vkResetCommandPool(g_gpu.device,
|
||||
_pools[i].graphics_pools[pool_idx],
|
||||
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
|
||||
rtLog("vk", "Failed to reset graphics pool slot %u index %u", i, pool_idx);
|
||||
}
|
||||
if (_pools[i].compute_pools != _pools[i].graphics_pools) {
|
||||
if (vkResetCommandPool(g_gpu.device,
|
||||
_pools[i].compute_pools[pool_idx],
|
||||
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
|
||||
rtLog("vk", "Failed to reset compute pool slot %u index %u", i, pool_idx);
|
||||
}
|
||||
}
|
||||
if (_pools[i].transfer_pools != _pools[i].graphics_pools &&
|
||||
_pools[i].transfer_pools != _pools[i].compute_pools) {
|
||||
if (vkResetCommandPool(g_gpu.device,
|
||||
_pools[i].transfer_pools[pool_idx],
|
||||
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
|
||||
rtLog("vk", "Failed to reset transfer pool slot %u index %u", i, pool_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static rt_result CreatePools(rt_thread_pools *pools) {
|
||||
/* Graphics pools */
|
||||
pools->graphics_pools = pools->pools;
|
||||
pools->distinct_pool_count = 0;
|
||||
VkCommandPoolCreateInfo graphics_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = g_gpu.graphics_family,
|
||||
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT};
|
||||
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
||||
if (vkCreateCommandPool(g_gpu.device,
|
||||
&graphics_info,
|
||||
g_gpu.alloc_cb,
|
||||
&pools->graphics_pools[i]) != VK_SUCCESS) {
|
||||
rtLog("vk", "Failed to create a graphics command pool.");
|
||||
DestroyPools(pools);
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
++pools->distinct_pool_count;
|
||||
}
|
||||
|
||||
if (g_gpu.compute_family != g_gpu.graphics_family) {
|
||||
VkCommandPoolCreateInfo compute_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = g_gpu.compute_family,
|
||||
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
|
||||
};
|
||||
pools->compute_pools = &pools->pools[pools->distinct_pool_count];
|
||||
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
||||
if (vkCreateCommandPool(g_gpu.device,
|
||||
&compute_info,
|
||||
g_gpu.alloc_cb,
|
||||
&pools->compute_pools[i]) != VK_SUCCESS) {
|
||||
rtLog("vk", "Failed to create a compute command pool.");
|
||||
DestroyPools(pools);
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
++pools->distinct_pool_count;
|
||||
}
|
||||
} else {
|
||||
pools->compute_pools = pools->graphics_pools;
|
||||
}
|
||||
|
||||
if (g_gpu.transfer_family != g_gpu.graphics_family &&
|
||||
g_gpu.transfer_family != g_gpu.compute_family) {
|
||||
VkCommandPoolCreateInfo transfer_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = g_gpu.transfer_family,
|
||||
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
|
||||
};
|
||||
pools->transfer_pools = &pools->pools[pools->distinct_pool_count];
|
||||
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
||||
if (vkCreateCommandPool(g_gpu.device,
|
||||
&transfer_info,
|
||||
g_gpu.alloc_cb,
|
||||
&pools->transfer_pools[i]) != VK_SUCCESS) {
|
||||
rtLog("vk", "Failed to create a transfer command pool.");
|
||||
DestroyPools(pools);
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
++pools->distinct_pool_count;
|
||||
}
|
||||
} else if (g_gpu.transfer_family == g_gpu.graphics_family) {
|
||||
pools->transfer_pools = pools->graphics_pools;
|
||||
} else if (g_gpu.transfer_family == g_gpu.compute_family) {
|
||||
pools->transfer_pools = pools->compute_pools;
|
||||
}
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
const rt_alloc_command_buffer_info *info,
|
||||
rt_command_buffer_handle *p_command_buffers) {
|
||||
rt_thread_pools *pools = &_pools[t_first_pool];
|
||||
if (t_first_pool == 0) {
|
||||
/* Acquire pools */
|
||||
t_first_pool = rtAtomic32Inc(&_next_pools);
|
||||
RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
|
||||
|
||||
pools = &_pools[t_first_pool];
|
||||
rt_result create_res = CreatePools(pools);
|
||||
if (create_res != RT_SUCCESS)
|
||||
return create_res;
|
||||
}
|
||||
if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
|
||||
uint32_t frame_id = 0;
|
||||
rt_result result = RT_SUCCESS;
|
||||
|
||||
/* TODO: We should probably batch allocations of the same type */
|
||||
uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
|
||||
uint32_t start = rtAtomic32FetchAdd(&_next_command_buffer, count);
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
uint32_t slot = (start + i) % mod;
|
||||
_command_buffers[slot].version =
|
||||
(_command_buffers[slot].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
||||
if (_command_buffers[slot].version == 0)
|
||||
_command_buffers[slot].version = 1;
|
||||
|
||||
VkCommandPool pool = pools->graphics_pools[frame_id];
|
||||
if (info[i].target_queue == RT_COMPUTE_QUEUE)
|
||||
pool = pools->compute_pools[frame_id];
|
||||
else if (info[i].target_queue == RT_TRANSFER_QUEUE)
|
||||
pool = pools->transfer_pools[frame_id];
|
||||
_command_buffers[slot].target_queue = info[i].target_queue;
|
||||
|
||||
VkCommandBufferAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
.commandPool = pool,
|
||||
};
|
||||
if (vkAllocateCommandBuffers(g_gpu.device,
|
||||
&alloc_info,
|
||||
&_command_buffers[slot].command_buffer) != VK_SUCCESS) {
|
||||
result = RT_UNKNOWN_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
p_command_buffers[i].index = slot;
|
||||
p_command_buffers[i].version = _command_buffers[slot].version;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
|
||||
const rt_submit_command_buffers_info *info) {
|
||||
|
||||
uint32_t count = info->command_buffer_count;
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
if (!temp.arena)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
|
||||
rt_result result = RT_SUCCESS;
|
||||
VkQueue target_queue = rtGetQueue(queue);
|
||||
|
||||
VkCommandBuffer *command_buffers = RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBuffer, count);
|
||||
if (!command_buffers) {
|
||||
result = RT_OUT_OF_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
uint32_t slot = info->command_buffers[i].index;
|
||||
if (_command_buffers[slot].version != info->command_buffers[i].version) {
|
||||
rtLog("vk",
|
||||
"Mismatch between handle version and stored version while submitting a command "
|
||||
"buffer");
|
||||
result = RT_INVALID_VALUE;
|
||||
goto out;
|
||||
}
|
||||
if (_command_buffers[slot].target_queue != queue) {
|
||||
rtLog("vk", "Mismatch between command buffer target queue and submit target queue.");
|
||||
result = RT_INVALID_VALUE;
|
||||
goto out;
|
||||
}
|
||||
command_buffers[i] = _command_buffers[slot].command_buffer;
|
||||
}
|
||||
|
||||
/* TODO(Kevin): Retrieve semaphores */
|
||||
VkSemaphore *wait_semaphores = NULL;
|
||||
VkSemaphore *signal_semaphores = NULL;
|
||||
uint32_t wait_count = 0;
|
||||
uint32_t signal_count = 0;
|
||||
|
||||
VkSubmitInfo submit_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pCommandBuffers = command_buffers,
|
||||
.commandBufferCount = count,
|
||||
.pWaitSemaphores = wait_semaphores,
|
||||
.pWaitDstStageMask = NULL,
|
||||
.waitSemaphoreCount = wait_count,
|
||||
.pSignalSemaphores = signal_semaphores,
|
||||
.signalSemaphoreCount = signal_count,
|
||||
};
|
||||
|
||||
if (vkQueueSubmit(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
|
||||
rtLog("vk", "vkQueueSubmit failed.");
|
||||
result = RT_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
out:
|
||||
rtReturnTemporaryArena(temp);
|
||||
return result;
|
||||
}
|
@ -3,4 +3,6 @@
|
||||
|
||||
#include "runtime/runtime.h"
|
||||
|
||||
void rtResetCommandPools(unsigned int frame_id);
|
||||
|
||||
#endif
|
||||
|
9
src/renderer/vk/frame.c
Normal file
9
src/renderer/vk/frame.c
Normal file
@ -0,0 +1,9 @@
|
||||
#include "gpu.h"
|
||||
#include "command_buffers.h"
|
||||
|
||||
#include "runtime/renderer_api.h"
|
||||
|
||||
void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
|
||||
g_gpu.current_frame_id = frame_id;
|
||||
rtResetCommandPools(frame_id);
|
||||
}
|
@ -9,6 +9,13 @@
|
||||
|
||||
#include "runtime/renderer_api.h"
|
||||
|
||||
/* Minimum supported value of g_gpu.max_frames_in_flight */
|
||||
#define RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT 2
|
||||
|
||||
/* Maximum supported number of frames in flight.
|
||||
* The actually configured value is contained in g_gpu. */
|
||||
#define RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT 3
|
||||
|
||||
#ifdef _WIN32
|
||||
struct HINSTANCE__;
|
||||
struct HWND__;
|
||||
@ -36,9 +43,11 @@ typedef struct {
|
||||
VkQueue graphics_queue;
|
||||
VkQueue compute_queue;
|
||||
VkQueue present_queue;
|
||||
VkQueue transfer_queue;
|
||||
uint32_t graphics_family;
|
||||
uint32_t compute_family;
|
||||
uint32_t present_family;
|
||||
uint32_t transfer_family;
|
||||
|
||||
rt_native_window native_window;
|
||||
|
||||
@ -48,6 +57,9 @@ typedef struct {
|
||||
VkPhysicalDeviceFeatures phys_device_features;
|
||||
|
||||
VmaAllocator allocator;
|
||||
|
||||
unsigned int max_frames_in_flight;
|
||||
unsigned int current_frame_id;
|
||||
} rt_vk_gpu;
|
||||
|
||||
#ifndef RT_VK_DONT_DEFINE_GPU_GLOBAL
|
||||
@ -60,4 +72,6 @@ VkFormat rtPixelFormatToVkFormat(rt_pixel_format format);
|
||||
|
||||
VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count);
|
||||
|
||||
VkQueue rtGetQueue(rt_gpu_queue queue);
|
||||
|
||||
#endif
|
||||
|
@ -41,3 +41,16 @@ VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count) {
|
||||
}
|
||||
return (VkSampleCountFlagBits)count;
|
||||
}
|
||||
|
||||
VkQueue rtGetQueue(rt_gpu_queue queue) {
|
||||
switch (queue) {
|
||||
case RT_GRAPHICS_QUEUE:
|
||||
return g_gpu.graphics_queue;
|
||||
case RT_COMPUTE_QUEUE:
|
||||
return g_gpu.compute_queue;
|
||||
case RT_TRANSFER_QUEUE:
|
||||
return g_gpu.transfer_queue;
|
||||
default:
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
@ -19,6 +19,8 @@ RT_CVAR_I(r_VkEnableAPIAllocTracking,
|
||||
|
||||
RT_CVAR_S(r_VkPhysDeviceName, "Name of the selected physical device. Default: \"\"", "");
|
||||
|
||||
RT_CVAR_I(r_VkMaxFramesInFlight, "Maximum number of frames in flight. [2/3] Default: 2", 2);
|
||||
|
||||
rt_vk_gpu g_gpu;
|
||||
|
||||
static VkAllocationCallbacks _tracking_alloc_cbs;
|
||||
@ -82,12 +84,15 @@ DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
|
||||
|
||||
extern rt_cvar r_VkPreferredSwapchainImages;
|
||||
extern rt_cvar r_VkPreferMailboxMode;
|
||||
extern rt_cvar r_VkMaxPipelineCount;
|
||||
|
||||
void RT_RENDERER_API_FN(RegisterCVars)(void) {
|
||||
rtRegisterCVAR(&r_VkEnableAPIAllocTracking);
|
||||
rtRegisterCVAR(&r_VkPhysDeviceName);
|
||||
rtRegisterCVAR(&r_VkPreferredSwapchainImages);
|
||||
rtRegisterCVAR(&r_VkPreferMailboxMode);
|
||||
rtRegisterCVAR(&r_VkMaxFramesInFlight);
|
||||
rtRegisterCVAR(&r_VkMaxPipelineCount);
|
||||
}
|
||||
|
||||
static rt_result CreateInstance(void) {
|
||||
@ -211,12 +216,14 @@ typedef struct {
|
||||
uint32_t graphics;
|
||||
uint32_t compute;
|
||||
uint32_t present;
|
||||
uint32_t transfer;
|
||||
} rt_queue_indices;
|
||||
|
||||
static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfaceKHR surface) {
|
||||
rt_queue_indices indices = {.graphics = UINT32_MAX,
|
||||
.compute = UINT32_MAX,
|
||||
.present = UINT32_MAX};
|
||||
.present = UINT32_MAX,
|
||||
.transfer = UINT32_MAX};
|
||||
|
||||
uint32_t count = 0;
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, NULL);
|
||||
@ -232,12 +239,20 @@ static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfac
|
||||
indices.graphics = i;
|
||||
if ((props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
|
||||
indices.compute = i;
|
||||
if ((props[i].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
|
||||
indices.transfer = i;
|
||||
|
||||
VkBool32 present_supported = VK_FALSE;
|
||||
vkGetPhysicalDeviceSurfaceSupportKHR(phys_dev, i, surface, &present_supported);
|
||||
if (present_supported)
|
||||
indices.present = i;
|
||||
}
|
||||
|
||||
if (indices.transfer == UINT32_MAX && indices.graphics != UINT32_MAX)
|
||||
indices.transfer = indices.graphics;
|
||||
else if (indices.transfer == UINT32_MAX && indices.compute != UINT32_MAX)
|
||||
indices.transfer = indices.compute;
|
||||
|
||||
free(props);
|
||||
return indices;
|
||||
}
|
||||
@ -405,11 +420,12 @@ static rt_result CreateDevice(void) {
|
||||
g_gpu.compute_family = queue_indices.compute;
|
||||
g_gpu.graphics_family = queue_indices.graphics;
|
||||
g_gpu.present_family = queue_indices.present;
|
||||
g_gpu.transfer_family = queue_indices.transfer;
|
||||
|
||||
float priority = 1.f;
|
||||
|
||||
uint32_t distinct_queue_count = 1;
|
||||
VkDeviceQueueCreateInfo queue_info[3];
|
||||
VkDeviceQueueCreateInfo queue_info[4];
|
||||
queue_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
queue_info[0].pNext = NULL;
|
||||
queue_info[0].flags = 0;
|
||||
@ -433,6 +449,17 @@ static rt_result CreateDevice(void) {
|
||||
queue_info[distinct_queue_count].queueCount = 1;
|
||||
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.present;
|
||||
queue_info[distinct_queue_count].pQueuePriorities = &priority;
|
||||
++distinct_queue_count;
|
||||
}
|
||||
if (queue_indices.transfer != queue_indices.graphics &&
|
||||
queue_indices.transfer != queue_indices.compute) {
|
||||
queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
queue_info[distinct_queue_count].pNext = NULL;
|
||||
queue_info[distinct_queue_count].flags = 0;
|
||||
queue_info[distinct_queue_count].queueCount = 1;
|
||||
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.transfer;
|
||||
queue_info[distinct_queue_count].pQueuePriorities = &priority;
|
||||
++distinct_queue_count;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceDescriptorIndexingFeatures indexing_features = {
|
||||
@ -463,6 +490,7 @@ static rt_result CreateDevice(void) {
|
||||
vkGetDeviceQueue(g_gpu.device, queue_indices.graphics, 0, &g_gpu.graphics_queue);
|
||||
vkGetDeviceQueue(g_gpu.device, queue_indices.compute, 0, &g_gpu.compute_queue);
|
||||
vkGetDeviceQueue(g_gpu.device, queue_indices.present, 0, &g_gpu.present_queue);
|
||||
vkGetDeviceQueue(g_gpu.device, queue_indices.transfer, 0, &g_gpu.transfer_queue);
|
||||
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
@ -519,6 +547,8 @@ extern rt_result InitPipelineManagement(void);
|
||||
extern void ShutdownPipelineManagement(void);
|
||||
extern rt_result InitRenderTargetManagement(void);
|
||||
extern void ShutdownRenderTargetManagement(void);
|
||||
extern rt_result InitCommandBufferManagement(void);
|
||||
extern void ShutdownCommandBufferManagement(void);
|
||||
|
||||
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
||||
rtLog("vk", "Init");
|
||||
@ -533,6 +563,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
||||
} else {
|
||||
g_gpu.alloc_cb = NULL;
|
||||
}
|
||||
g_gpu.max_frames_in_flight = RT_RESTRICT_VALUE_TO_BOUNDS(r_VkMaxFramesInFlight.i,
|
||||
RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT,
|
||||
RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT);
|
||||
|
||||
int res = CreateInstance();
|
||||
if (res != RT_SUCCESS)
|
||||
@ -553,6 +586,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
res = InitRenderTargetManagement();
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
res = InitCommandBufferManagement();
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
res = rtCreateSwapchain();
|
||||
@ -566,6 +602,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) {
|
||||
rtLog("vk", "Shutdown");
|
||||
vkDeviceWaitIdle(g_gpu.device);
|
||||
rtDestroySwapchain();
|
||||
ShutdownCommandBufferManagement();
|
||||
ShutdownRenderTargetManagement();
|
||||
ShutdownPipelineManagement();
|
||||
DestroyAllocator();
|
||||
|
@ -16,6 +16,7 @@ if vk_dep.found()
|
||||
'swapchain.h',
|
||||
|
||||
'command_buffers.c',
|
||||
'frame.c',
|
||||
'helper.c',
|
||||
'init.c',
|
||||
'pipelines.c',
|
||||
|
@ -121,12 +121,7 @@ rt_result rtCreateSwapchain(void) {
|
||||
return 50;
|
||||
}
|
||||
g_swapchain.format = device_params.surface_format.format;
|
||||
g_swapchain.extent =
|
||||
|
||||
|
||||
|
||||
|
||||
device_params.extent;
|
||||
g_swapchain.extent = device_params.extent;
|
||||
|
||||
/* Retrieve images */
|
||||
g_swapchain.image_count = 0;
|
||||
|
29
src/runtime/atomics.h
Normal file
29
src/runtime/atomics.h
Normal file
@ -0,0 +1,29 @@
|
||||
#ifndef RT_ATOMICS_H
|
||||
#define RT_ATOMICS_H
|
||||
|
||||
/* Macros & helpers for atomic instructions */
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
/* Increment and decrement return the new value */
|
||||
|
||||
#define rtAtomic32Inc(pa) _InterlockedIncrement((volatile LONG *)(pa))
|
||||
#define rtAtomic64Inc(pa) _InterlockedIncrement64((volatile LONG64 *)(pa))
|
||||
#define rtAtomic32Dec(pa) _InterlockedDecrement((volatile LONG *)(pa))
|
||||
#define rtAtomic64Dec(pa) _InterlockedDecrement64((volatile LONG64 *)(pa))
|
||||
|
||||
#define rtAtomic32FetchAdd(pa, value) _InterlockedExchangeAdd((volatile LONG *)(pa), (LONG)(value))
|
||||
#define rtAtomic64FetchAdd(pa, value) _InterlockedExchangeAdd64((volatile LONG64 *)(pa), (LONG)(value))
|
||||
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
|
||||
#define rtAtomic32Inc(pa) __atomic_add_fetch((pa), 1, __ATOMIC_SEQ_CST)
|
||||
#define rtAtomic64Inc(pa) __atomic_add_fetch((pa), 1LL, __ATOMIC_SEQ_CST)
|
||||
#define rtAtomic32Dec(pa) __atomic_sub_fetch((pa), 1, __ATOMIC_SEQ_CST)
|
||||
#define rtAtomic64Dec(pa) __atomic_sub_fetch((pa), 1LL, __ATOMIC_SEQ_CST)
|
||||
|
||||
#define rtAtomic32FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
|
||||
#define rtAtomic64FetchAdd(pa, value) _-atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
|
||||
#endif
|
||||
|
||||
#endif
|
@ -61,6 +61,8 @@ RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info);
|
||||
|
||||
RT_DLLEXPORT void rtShutdownGFX(void);
|
||||
|
||||
RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id);
|
||||
|
||||
/* *********************************************************************
|
||||
* Framegraph API
|
||||
*
|
||||
|
@ -25,17 +25,17 @@ RT_CVAR_S(rt_Renderer, "Select the render backend. Available options: [vk], Defa
|
||||
extern void RT_RENDERER_API_FN(RegisterCVars)(void);
|
||||
extern rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *);
|
||||
extern void RT_RENDERER_API_FN(Shutdown)(void);
|
||||
extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int);
|
||||
extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *);
|
||||
extern void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle);
|
||||
extern rt_render_target_handle
|
||||
RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *);
|
||||
extern void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle);
|
||||
extern rt_result
|
||||
RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
rt_render_command_buffer_handle *p_command_buffers,
|
||||
rt_gpu_semaphore_handle *p_semaphores);
|
||||
extern rt_result RT_RENDERER_API_FN(
|
||||
SubmitCommandBuffers)(uint32_t count, const rt_render_command_buffer_handle *command_buffers);
|
||||
extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t,
|
||||
const rt_alloc_command_buffer_info *,
|
||||
rt_command_buffer_handle *);
|
||||
extern rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue,
|
||||
const rt_submit_command_buffers_info *);
|
||||
#endif
|
||||
|
||||
extern rt_result InitFramegraphManager(void);
|
||||
@ -62,6 +62,7 @@ static bool LoadRenderer(void) {
|
||||
RETRIEVE_SYMBOL(RegisterCVars, rt_register_renderer_cvars_fn);
|
||||
RETRIEVE_SYMBOL(Init, rt_init_renderer_fn);
|
||||
RETRIEVE_SYMBOL(Shutdown, rt_shutdown_renderer_fn);
|
||||
RETRIEVE_SYMBOL(BeginFrame, rt_begin_frame_fn);
|
||||
RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn);
|
||||
RETRIEVE_SYMBOL(DestroyPipeline, rt_destroy_pipeline_fn);
|
||||
RETRIEVE_SYMBOL(CreateRenderTarget, rt_create_render_target_fn);
|
||||
@ -80,6 +81,7 @@ static bool LoadRenderer(void) {
|
||||
g_renderer.RegisterCVars = &rtRenRegisterCVars;
|
||||
g_renderer.Init = &rtRenInit;
|
||||
g_renderer.Shutdown = &rtRenShutdown;
|
||||
g_renderer.BeginFrame = &rtRenBeginFrame;
|
||||
g_renderer.CompilePipeline = &rtRenCompilePipeline;
|
||||
g_renderer.DestroyPipeline = &rtRenDestroyPipeline;
|
||||
g_renderer.CreateRenderTarget = &rtRenCreateRenderTarget;
|
||||
@ -121,3 +123,7 @@ RT_DLLEXPORT void rtShutdownGFX(void) {
|
||||
ShutdownFramegraphManager();
|
||||
g_renderer.Shutdown();
|
||||
}
|
||||
|
||||
RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id) {
|
||||
g_renderer.BeginFrame(frame_id);
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
#include "main_loop.h"
|
||||
#include "runtime.h"
|
||||
#include "config.h"
|
||||
#include "gfx.h"
|
||||
|
||||
RT_CVAR_I(rt_MaxFrameLatency, "Maximum latency between update and rendering. Default: 2", 2);
|
||||
|
||||
@ -16,11 +17,11 @@ void UpdateThreadEntry(void *param) {
|
||||
while (!g_main_loop.shutdown) {
|
||||
/* Wait until the render thread has catched up */
|
||||
rtWaitOnSemaphore(&g_main_loop.update_proceed);
|
||||
rtLog("UT", "Processing %d", g_main_loop.u_frame_id);
|
||||
rtLog("UT", "Processing %u", g_main_loop.u_frame_id);
|
||||
|
||||
(g_main_loop.GameUpdate)();
|
||||
|
||||
rtLog("UT", "Finished %d", g_main_loop.u_frame_id);
|
||||
rtLog("UT", "Finished %u", g_main_loop.u_frame_id);
|
||||
g_main_loop.u_frame_id += 1;
|
||||
/* Signal the render thread that data is available */
|
||||
rtSignalSemaphore(&g_main_loop.render_proceed);
|
||||
@ -35,11 +36,12 @@ void RenderThreadEntry(void *param) {
|
||||
rtLog("RT", "RenderThread Entry");
|
||||
while (!g_main_loop.shutdown) {
|
||||
rtWaitOnSemaphore(&g_main_loop.render_proceed);
|
||||
rtLog("RT", "Processing %d", g_main_loop.r_frame_id);
|
||||
rtLog("RT", "Processing %u", g_main_loop.r_frame_id);
|
||||
|
||||
rtBeginGFXFrame(g_main_loop.r_frame_id);
|
||||
(g_main_loop.GameRender)();
|
||||
|
||||
rtLog("RT", "Finished %d", g_main_loop.r_frame_id);
|
||||
rtLog("RT", "Finished %u", g_main_loop.r_frame_id);
|
||||
g_main_loop.r_frame_id += 1;
|
||||
/* Signal the update thread that we have finished and it can proceed */
|
||||
rtSignalSemaphore(&g_main_loop.update_proceed);
|
||||
|
@ -8,8 +8,8 @@ typedef void rt_main_loop_update_fn(void);
|
||||
typedef void rt_main_loop_render_fn(void);
|
||||
|
||||
typedef struct {
|
||||
int u_frame_id;
|
||||
int r_frame_id;
|
||||
unsigned int u_frame_id;
|
||||
unsigned int r_frame_id;
|
||||
|
||||
rt_semaphore update_proceed;
|
||||
rt_semaphore render_proceed;
|
||||
|
@ -5,6 +5,7 @@ runtime_lib = library('rt',
|
||||
# Project Sources
|
||||
'aio.h',
|
||||
'app.h',
|
||||
'atomics.h',
|
||||
'buffer_manager.h',
|
||||
'compression.h',
|
||||
'config.h',
|
||||
|
@ -13,6 +13,25 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Handles for backend objects */
|
||||
|
||||
#define RT_RENDER_BACKEND_HANDLE_MAX_VERSION 255
|
||||
|
||||
#define RT_RENDER_BACKEND_HANDLE(name) \
|
||||
typedef struct { \
|
||||
uint32_t version : 8; \
|
||||
uint32_t index : 24; \
|
||||
} name
|
||||
|
||||
RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle);
|
||||
RT_RENDER_BACKEND_HANDLE(rt_render_target_handle);
|
||||
RT_RENDER_BACKEND_HANDLE(rt_command_buffer_handle);
|
||||
RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle);
|
||||
|
||||
#undef RT_RENDER_BACKEND_HANDLE
|
||||
|
||||
/* Init data for the renderer */
|
||||
|
||||
#ifdef _WIN32
|
||||
struct HINSTANCE__;
|
||||
struct HWND__;
|
||||
@ -30,6 +49,14 @@ struct rt_renderer_init_info_s {
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Argument types for render commands */
|
||||
|
||||
typedef enum {
|
||||
RT_GRAPHICS_QUEUE,
|
||||
RT_COMPUTE_QUEUE,
|
||||
RT_TRANSFER_QUEUE,
|
||||
} rt_gpu_queue;
|
||||
|
||||
typedef struct {
|
||||
rt_resource_id vertex_shader;
|
||||
rt_resource_id fragment_shader;
|
||||
@ -86,41 +113,39 @@ typedef struct {
|
||||
size_t bytecode_length;
|
||||
} rt_shader_info;
|
||||
|
||||
/* Handles for backend objects */
|
||||
typedef struct {
|
||||
rt_gpu_queue target_queue;
|
||||
} rt_alloc_command_buffer_info;
|
||||
|
||||
#define RT_RENDER_BACKEND_HANDLE_MAX_VERSION 255
|
||||
typedef struct {
|
||||
const rt_command_buffer_handle *command_buffers;
|
||||
const rt_gpu_semaphore_handle *wait_semaphores;
|
||||
const rt_gpu_semaphore_handle *signal_semaphores;
|
||||
uint32_t command_buffer_count;
|
||||
uint32_t wait_semaphore_count;
|
||||
uint32_t signal_semaphore_count;
|
||||
} rt_submit_command_buffers_info;
|
||||
|
||||
#define RT_RENDER_BACKEND_HANDLE(name) \
|
||||
typedef struct { \
|
||||
uint32_t version : 8; \
|
||||
uint32_t index : 24; \
|
||||
} name
|
||||
|
||||
RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle);
|
||||
RT_RENDER_BACKEND_HANDLE(rt_render_target_handle);
|
||||
RT_RENDER_BACKEND_HANDLE(rt_render_command_buffer_handle);
|
||||
RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle);
|
||||
|
||||
#undef RT_RENDER_BACKEND_HANDLE
|
||||
/* Renderer API */
|
||||
|
||||
typedef void rt_register_renderer_cvars_fn(void);
|
||||
typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info);
|
||||
typedef void rt_shutdown_renderer_fn(void);
|
||||
typedef void rt_begin_frame_fn(unsigned int frame_id);
|
||||
typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info);
|
||||
typedef void rt_destroy_pipeline_fn(rt_pipeline_handle handle);
|
||||
typedef rt_render_target_handle rt_create_render_target_fn(const rt_render_target_info *info);
|
||||
typedef void rt_destroy_render_target_fn(rt_render_target_handle handle);
|
||||
typedef rt_result rt_alloc_command_buffers_fn(uint32_t count,
|
||||
rt_render_command_buffer_handle *p_command_buffers,
|
||||
rt_gpu_semaphore_handle *p_semaphores);
|
||||
typedef rt_result
|
||||
rt_submit_command_buffers_fn(uint32_t count,
|
||||
const rt_render_command_buffer_handle *command_buffers);
|
||||
const rt_alloc_command_buffer_info *info,
|
||||
rt_command_buffer_handle *p_command_buffers);
|
||||
typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue, const rt_submit_command_buffers_info *info);
|
||||
|
||||
typedef struct {
|
||||
rt_register_renderer_cvars_fn *RegisterCVars;
|
||||
rt_init_renderer_fn *Init;
|
||||
rt_shutdown_renderer_fn *Shutdown;
|
||||
rt_begin_frame_fn *BeginFrame;
|
||||
rt_compile_pipeline_fn *CompilePipeline;
|
||||
rt_destroy_pipeline_fn *DestroyPipeline;
|
||||
rt_create_render_target_fn *CreateRenderTarget;
|
||||
|
@ -28,6 +28,8 @@ extern "C" {
|
||||
#define RT_UNUSED(x) ((void)sizeof((x)))
|
||||
#define RT_ARRAY_COUNT(x) (sizeof((x)) / sizeof((x)[0]))
|
||||
|
||||
#define RT_RESTRICT_VALUE_TO_BOUNDS(v, lower, upper) (((v) < (lower)) ? (lower) : (((v) > (upper)) ? (upper) : (v)))
|
||||
|
||||
#define RT_KB(n) ((n)*1024U)
|
||||
#define RT_MB(n) ((n)*1024U * 1024U)
|
||||
#define RT_GB(n) ((n)*1024U * 1024U * 1024U)
|
||||
|
Loading…
Reference in New Issue
Block a user