rtengine/src/renderer/vk/command_buffers.c
Kevin Trogant 3bc192b281 dump state
this will be the basis of the framegraph rewrite, because the current
state is fucked
2024-03-25 17:55:03 +01:00

490 lines
19 KiB
C

#include "gpu.h"
#include "gpu_sync.h"
#include "swapchain.h"
#include "runtime/atomics.h"
#include "runtime/config.h"
#include "runtime/handles.h"
#include "runtime/mem_arena.h"
#include "runtime/runtime.h"
#include "gfx/renderer_api.h"
#include <stdlib.h>
RT_CVAR_I(rt_VkMaxCommandPools,
"Maximum number of command pools that can be created. Default: 32",
32);
RT_CVAR_I(
rt_VkCommandBufferRingBufferSize,
"Size of the ring buffer used to store command buffers. Must be a power of two! Default: 512",
512);
typedef struct {
VkCommandPool pools[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT * 3];
uint32_t distinct_pool_count;
VkCommandPool *compute_pools;
VkCommandPool *graphics_pools;
VkCommandPool *transfer_pools;
} rt_thread_pools;
typedef struct {
VkCommandBuffer command_buffer;
uint32_t version;
rt_gpu_queue target_queue;
} rt_command_buffer;
static rt_thread_pools *_pools;
static uint32_t _next_pools;
static RT_THREAD_LOCAL unsigned int t_first_pool;
static rt_command_buffer *_command_buffers;
/* We let this overflow on its own. Use MOD rt_VkCommandBufferRingBufferSize to get the actual
* index. */
static uint32_t _next_command_buffer;
rt_result InitCommandBufferManagement(void) {
_pools = calloc((size_t)rt_VkMaxCommandPools.i, sizeof(rt_thread_pools));
if (!_pools)
return RT_OUT_OF_MEMORY;
_command_buffers =
calloc((size_t)rt_VkCommandBufferRingBufferSize.i, sizeof(rt_command_buffer));
if (!_command_buffers) {
free(_pools);
return RT_OUT_OF_MEMORY;
}
/* We keep 0 free as a "Not initialized" value for t_first_pool.
* The atomicinc used to acquire a pool returns the incremented value, so 0 is never returned.
*/
_next_pools = 0;
return RT_SUCCESS;
}
static void DestroyPools(rt_thread_pools *pools) {
for (uint32_t j = 0; j < pools->distinct_pool_count; ++j)
vkDestroyCommandPool(g_gpu.device, pools->pools[j], g_gpu.alloc_cb);
free(_pools);
}
void ShutdownCommandBufferManagement(void) {
/* _next_pools is the number of existing pools */
for (uint32_t i = 1; i < _next_pools; ++i) {
DestroyPools(&_pools[i]);
}
}
void rtResetCommandPools(unsigned int frame_id) {
unsigned int pool_idx = frame_id % g_gpu.max_frames_in_flight;
for (uint32_t i = 1; i < _next_pools; ++i) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].graphics_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset graphics pool slot %u index %u", i, pool_idx);
}
if (_pools[i].compute_pools != _pools[i].graphics_pools) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].compute_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset compute pool slot %u index %u", i, pool_idx);
}
}
if (_pools[i].transfer_pools != _pools[i].graphics_pools &&
_pools[i].transfer_pools != _pools[i].compute_pools) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].transfer_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset transfer pool slot %u index %u", i, pool_idx);
}
}
}
}
static rt_result CreatePools(rt_thread_pools *pools) {
/* Graphics pools */
pools->graphics_pools = pools->pools;
pools->distinct_pool_count = 0;
VkCommandPoolCreateInfo graphics_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.graphics_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT};
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&graphics_info,
g_gpu.alloc_cb,
&pools->graphics_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a graphics command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
if (g_gpu.compute_family != g_gpu.graphics_family) {
VkCommandPoolCreateInfo compute_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.compute_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
};
pools->compute_pools = &pools->pools[pools->distinct_pool_count];
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&compute_info,
g_gpu.alloc_cb,
&pools->compute_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a compute command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
} else {
pools->compute_pools = pools->graphics_pools;
}
if (g_gpu.transfer_family != g_gpu.graphics_family &&
g_gpu.transfer_family != g_gpu.compute_family) {
VkCommandPoolCreateInfo transfer_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.transfer_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
};
pools->transfer_pools = &pools->pools[pools->distinct_pool_count];
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&transfer_info,
g_gpu.alloc_cb,
&pools->transfer_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a transfer command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
} else if (g_gpu.transfer_family == g_gpu.graphics_family) {
pools->transfer_pools = pools->graphics_pools;
} else if (g_gpu.transfer_family == g_gpu.compute_family) {
pools->transfer_pools = pools->compute_pools;
}
return RT_SUCCESS;
}
rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
const rt_alloc_command_buffer_info *info,
rt_command_buffer_handle *p_command_buffers) {
rt_thread_pools *pools = &_pools[t_first_pool];
if (t_first_pool == 0) {
/* Acquire pools */
t_first_pool = rtAtomic32Inc(&_next_pools);
RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
pools = &_pools[t_first_pool];
rt_result create_res = CreatePools(pools);
if (create_res != RT_SUCCESS)
return create_res;
}
if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
return RT_OUT_OF_MEMORY;
uint32_t frame_id = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
rt_result result = RT_SUCCESS;
/* TODO: We should probably batch allocations of the same type */
uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
uint32_t start = rtAtomic32FetchAdd(&_next_command_buffer, count);
for (uint32_t i = 0; i < count; ++i) {
uint32_t slot = (start + i) % mod;
_command_buffers[slot].version =
(_command_buffers[slot].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
if (_command_buffers[slot].version == 0)
_command_buffers[slot].version = 1;
VkCommandPool pool = pools->graphics_pools[frame_id];
if (info[i].target_queue == RT_COMPUTE_QUEUE)
pool = pools->compute_pools[frame_id];
else if (info[i].target_queue == RT_TRANSFER_QUEUE)
pool = pools->transfer_pools[frame_id];
_command_buffers[slot].target_queue = info[i].target_queue;
VkCommandBufferAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
.commandPool = pool,
};
if (vkAllocateCommandBuffers(g_gpu.device,
&alloc_info,
&_command_buffers[slot].command_buffer) != VK_SUCCESS) {
result = RT_UNKNOWN_ERROR;
break;
}
VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
vkBeginCommandBuffer(_command_buffers[slot].command_buffer, &begin_info);
p_command_buffers[i].index = (slot + 1);
p_command_buffers[i].version = _command_buffers[slot].version;
}
return result;
}
#define RT_VK_LOG_SUBMIT_INFO 1
rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
const rt_submit_command_buffers_info *info) {
uint32_t count = info->command_buffer_count;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return RT_OUT_OF_MEMORY;
rt_result result = RT_SUCCESS;
VkQueue target_queue = rtGetQueue(queue);
VkCommandBufferSubmitInfo *command_buffers =
RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBufferSubmitInfo, count);
if (!command_buffers) {
result = RT_OUT_OF_MEMORY;
goto out;
}
VkSemaphoreSubmitInfo *wait_semaphores =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->wait_semaphore_count);
if (!wait_semaphores && info->wait_semaphore_count > 0) {
result = RT_OUT_OF_MEMORY;
goto out;
}
VkSemaphoreSubmitInfo *signal_semaphores =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->signal_semaphore_count);
if (!signal_semaphores && info->signal_semaphore_count > 0) {
result = RT_OUT_OF_MEMORY;
goto out;
}
uint32_t wait_count = info->wait_semaphore_count;
uint32_t signal_count = info->signal_semaphore_count;
for (uint32_t i = 0; i < wait_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = rtGetSemaphore(info->wait_semaphores[i]),
.value = info->wait_values[i],
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
wait_semaphores[i] = semaphore_info;
}
for (uint32_t i = 0; i < signal_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = rtGetSemaphore(info->signal_semaphores[i]),
.value = info->signal_values[i],
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
signal_semaphores[i] = semaphore_info;
}
for (uint32_t i = 0; i < count; ++i) {
if (!RT_IS_HANDLE_VALID(info->command_buffers[i])) {
rtLog("vk", "Tried to submit an invalid command buffer.");
result = RT_INVALID_VALUE;
goto out;
}
uint32_t slot = info->command_buffers[i].index - 1;
if (_command_buffers[slot].version != info->command_buffers[i].version) {
rtLog("vk",
"Mismatch between handle version and stored version while submitting a command "
"buffer");
result = RT_INVALID_VALUE;
goto out;
}
if (_command_buffers[slot].target_queue != queue) {
rtLog("vk", "Mismatch between command buffer target queue and submit target queue.");
result = RT_INVALID_VALUE;
goto out;
}
command_buffers[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO;
command_buffers[i].pNext = NULL;
command_buffers[i].deviceMask = 0;
command_buffers[i].commandBuffer = _command_buffers[slot].command_buffer;
vkEndCommandBuffer(command_buffers[i].commandBuffer);
}
#if RT_VK_LOG_SUBMIT_INFO
{
const char *queue_str = "<invalid>";
if (queue == RT_GRAPHICS_QUEUE)
queue_str = "GRAPHICS";
else if (queue == RT_COMPUTE_QUEUE)
queue_str = "COMPUTE";
else if (queue == RT_TRANSFER_QUEUE)
queue_str = "TRANSFER";
rtLog("vk", "Submit Info");
rtLog("vk", "Queue: %s", queue_str);
rtLog("vk", "Command Buffers: %u", count);
rtLog("vk", " - TODO: More Info");
rtLog("vk", "Wait Semaphores:");
for (uint32_t i = 0; i < wait_count; ++i) {
rtLog("vk",
" - %u:%u Value %u",
info->wait_semaphores[i].version,
info->wait_semaphores[i].index,
info->wait_values[i]);
}
rtLog("vk", "Signal Semaphores:");
for (uint32_t i = 0; i < signal_count; ++i) {
rtLog("vk",
" - %u:%u Value %u",
info->signal_semaphores[i].version,
info->signal_semaphores[i].index,
info->signal_values[i]);
}
}
#endif
VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = wait_count,
.signalSemaphoreInfoCount = signal_count,
.pWaitSemaphoreInfos = wait_semaphores,
.pSignalSemaphoreInfos = signal_semaphores,
.commandBufferInfoCount = count,
.pCommandBufferInfos = command_buffers,
};
if (vkQueueSubmit2(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
rtLog("vk", "vkQueueSubmit failed.");
result = RT_UNKNOWN_ERROR;
}
out:
rtReturnTemporaryArena(temp);
return result;
}
VkCommandBuffer rtGetCommandBuffer(rt_command_buffer_handle cmdbuf) {
uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
if (!RT_IS_HANDLE_VALID(cmdbuf))
return VK_NULL_HANDLE;
uint32_t slot = (cmdbuf.index - 1) % mod;
if (_command_buffers[slot].version != cmdbuf.version) {
return VK_NULL_HANDLE;
}
return _command_buffers[slot].command_buffer;
}
VkCommandBuffer rtAllocSingleCommandBuffer(rt_gpu_queue queue) {
rt_thread_pools *pools = &_pools[t_first_pool];
if (t_first_pool == 0) {
/* Acquire pools */
t_first_pool = rtAtomic32Inc(&_next_pools);
RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
pools = &_pools[t_first_pool];
rt_result create_res = CreatePools(pools);
if (create_res != RT_SUCCESS)
return VK_NULL_HANDLE;
}
if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
return VK_NULL_HANDLE;
uint32_t frame_id = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
VkCommandPool pool = pools->graphics_pools[frame_id];
if (queue == RT_COMPUTE_QUEUE)
pool = pools->compute_pools[frame_id];
else if (queue == RT_TRANSFER_QUEUE)
pool = pools->transfer_pools[frame_id];
VkCommandBufferAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
.commandPool = pool,
};
VkCommandBuffer cmdbuf;
if (vkAllocateCommandBuffers(g_gpu.device, &alloc_info, &cmdbuf) != VK_SUCCESS) {
return VK_NULL_HANDLE;
}
return cmdbuf;
}
rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
const VkSemaphore *wait_semaphores,
const uint32_t *wait_values,
uint32_t wait_semaphore_count,
const VkSemaphore *signal_semaphores,
const uint32_t *signal_values,
uint32_t signal_semaphore_count,
rt_gpu_queue queue,
VkFence fence) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return RT_OUT_OF_MEMORY;
VkQueue target_queue = rtGetQueue(queue);
rt_result result = RT_SUCCESS;
VkSemaphoreSubmitInfo *wait_semaphore_info =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, wait_semaphore_count);
if (!wait_semaphore_info && wait_semaphore_count > 0) {
result = RT_OUT_OF_MEMORY;
goto out;
}
VkSemaphoreSubmitInfo *signal_semaphore_info =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, signal_semaphore_count);
if (!signal_semaphore_info && signal_semaphore_count > 0) {
result = RT_OUT_OF_MEMORY;
goto out;
}
uint32_t wait_count = wait_semaphore_count;
uint32_t signal_count = signal_semaphore_count;
for (uint32_t i = 0; i < wait_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = wait_semaphores[i],
.value = (wait_values) ? wait_values[i] : 0,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
wait_semaphore_info[i] = semaphore_info;
}
for (uint32_t i = 0; i < signal_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = signal_semaphores[i],
.value = (signal_values) ? signal_values[i] : 0,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
signal_semaphore_info[i] = semaphore_info;
}
VkCommandBufferSubmitInfo command_buffer_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.deviceMask = 0,
.commandBuffer = command_buffer,
};
VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = wait_count,
.signalSemaphoreInfoCount = signal_count,
.pWaitSemaphoreInfos = wait_semaphore_info,
.pSignalSemaphoreInfos = signal_semaphore_info,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &command_buffer_info,
};
if (vkQueueSubmit2(target_queue, 1, &submit_info, fence) != VK_SUCCESS) {
rtLog("vk", "vkQueueSubmit failed.");
result = RT_UNKNOWN_ERROR;
}
out:
rtReturnTemporaryArena(temp);
return result;
}