Try to improve resource layout transition

This commit is contained in:
Kevin Trogant 2024-03-17 15:10:45 +01:00
parent 2a2a743c78
commit 6f89dd4c46
20 changed files with 522 additions and 61 deletions

27
assets/forward.framegraph Normal file
View File

@ -0,0 +1,27 @@
render_targets {
swapchain_out {
format SWAPCHAIN;
width SWAPCHAIN;
height SWAPCHAIN;
sample_count 1;
}
}
passes {
forward {
type GRAPHICS;
writes {
swapchain_out {
clear_value {
r 1.0;
g 1.0;
b 1.0;
a 1.0;
}
clear YES;
discard NO;
}
}
}
}

View File

@ -18,7 +18,7 @@ static void PassPrepare(rt_render_pass_id pass,
uint32_t write_count,
const rt_render_target_read *reads,
uint32_t read_count) {
//rtLog("GAME", "Prepare pass %x", pass);
// rtLog("GAME", "Prepare pass %x", pass);
}
static void PassExecute(rt_render_pass_id pass,
@ -26,7 +26,7 @@ static void PassExecute(rt_render_pass_id pass,
uint32_t write_count,
const rt_render_target_read *reads,
uint32_t read_count) {
//rtLog("GAME", "Execute pass %x", pass);
// rtLog("GAME", "Execute pass %x", pass);
}
static void PassFinalize(rt_render_pass_id pass,
@ -34,7 +34,7 @@ static void PassFinalize(rt_render_pass_id pass,
uint32_t write_count,
const rt_render_target_read *reads,
uint32_t read_count) {
//rtLog("GAME", "Finalize pass %x", pass);
// rtLog("GAME", "Finalize pass %x", pass);
}
/* Called after the runtime has finished its initialization and before entering the main-loop*/
@ -46,6 +46,19 @@ void Init(void) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
#if 0
rt_resource_id resid = rtGetResourceID("assets/forward.framegraph");
size_t size = rtGetResourceSize(resid);
rt_resource *res = rtArenaPush(temp.arena, size);
rtGetResource(resid, res);
_framegraph = rtCreateFramegraph(res->data);
rt_render_pass_bind_fns bind = {.Execute = PassExecute,
.Prepare = PassPrepare,
.Finalize = PassFinalize};
rtBindRenderPass(_framegraph, rtCalculateRenderPassID("forward", sizeof("forward") - 1), &bind);
#else
rt_resource_id resid = rtGetResourceID("assets/test.framegraph");
size_t size = rtGetResourceSize(resid);
rt_resource *res = rtArenaPush(temp.arena, size);
@ -58,6 +71,7 @@ void Init(void) {
.Finalize = PassFinalize};
rtBindRenderPass(_framegraph, rtCalculateRenderPassID("pass0", sizeof("pass0") - 1), &bind);
rtBindRenderPass(_framegraph, rtCalculateRenderPassID("pass1", sizeof("pass1") - 1), &bind);
#endif
}
/* Called after exiting the main-loop and before the runtime starts its shutdown */

10
src/gfx/builtin_objects.c Normal file
View File

@ -0,0 +1,10 @@
#define RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL
#include "builtin_objects.h"
rt_builtin_render_object_types g_builtin_render_object_types;
RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void) {
g_builtin_render_object_types.render_mesh =
rtRegisterRenderObjectType(sizeof(rt_render_mesh), "render_mesh");
return RT_SUCCESS;
}

39
src/gfx/builtin_objects.h Normal file
View File

@ -0,0 +1,39 @@
#ifndef RT_GFX_BUILTIN_OBJECTS_H
#define RT_GFX_BUILTIN_OBJECTS_H
/* Render Object types used by the builtin graphics passes.
*
* As an user you are free to not use these, but then you
* also cannot use the builtin render passes. */
#include "renderer_api.h"
#include "render_list.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
rt_buffer_handle vbo;
rt_buffer_handle ibo;
uint32_t vertex_count;
uint32_t index_count;
} rt_render_mesh;
typedef struct {
rt_render_object_type render_mesh;
} rt_builtin_render_object_types;
#ifndef RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL
extern RT_DLLIMPORT rt_builtin_render_object_types g_builtin_render_object_types;
#endif
RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -647,14 +647,14 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr
}
/* Find the last pass that writes to the swapchain */
uint32_t last_swapchain_write = 0;
uint32_t last_swapchain_write = framegraph->pass_count - 1;
for (uint32_t i = framegraph->pass_count - 1; i > 0; --i) {
if (framegraph->passes[i].writes_swapchain) {
last_swapchain_write = i;
break;
}
}
/* Find the first pass that reads the swapchain 0*/
/* Find the first pass that reads the swapchain */
uint32_t first_swapchain_read = 0;
for (uint32_t i = 0; framegraph->pass_count; ++i) {
if (framegraph->passes[i].reads_swapchain || framegraph->passes[i].writes_swapchain) {
@ -705,19 +705,25 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr
bool is_graphics_pass =
framegraph->passes[pass_idx].type == RT_RENDER_PASS_TYPE_GRAPHICS;
if (is_graphics_pass) {
graphics_wait_semaphore_count += framegraph->passes[pass_idx].read_count;
graphics_wait_semaphore_count += framegraph->passes[pass_idx].read_count +
framegraph->passes[pass_idx].write_count;
graphics_signal_semaphore_count += framegraph->passes[pass_idx].write_count;
if (framegraph->passes[pass_idx].reads_swapchain)
if (framegraph->passes[pass_idx].reads_swapchain ||
pass_idx == first_swapchain_read)
graphics_wait_semaphore_count += 1;
if (framegraph->passes[pass_idx].writes_swapchain)
if (framegraph->passes[pass_idx].writes_swapchain ||
pass_idx == last_swapchain_write)
graphics_signal_semaphore_count += 1;
++graphics_command_buffer_count;
} else {
compute_wait_semaphore_count += framegraph->passes[pass_idx].read_count;
compute_wait_semaphore_count += framegraph->passes[pass_idx].read_count +
framegraph->passes[pass_idx].write_count;
compute_signal_semaphore_count += framegraph->passes[pass_idx].write_count;
if (framegraph->passes[pass_idx].reads_swapchain)
if (framegraph->passes[pass_idx].reads_swapchain ||
pass_idx == first_swapchain_read)
compute_wait_semaphore_count += 1;
if (framegraph->passes[pass_idx].writes_swapchain)
if (framegraph->passes[pass_idx].writes_swapchain ||
pass_idx == last_swapchain_write)
compute_signal_semaphore_count += 1;
++compute_command_buffer_count;
}
@ -850,6 +856,12 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr
signal_semaphores[*signal_count] = rt->semaphore;
signal_values[*signal_count] = signal_value_base + execution_level + 1;
*signal_count += 1;
if (signal_value_base >= 200) {
wait_semaphores[*wait_count] = rt->semaphore;
wait_values[*wait_count] = signal_value_base - 200 + execution_level + 1;
*wait_count += 1;
}
}
if (pass_idx == first_swapchain_read) {
wait_semaphores[*wait_count] = swapchain_available;

View File

@ -1,11 +1,13 @@
gfx_deps = [thread_dep, m_dep]
gfx_lib = library('rtgfx',
# Project Sources
'builtin_objects.h',
'effect.h',
'gfx.h',
'renderer_api.h',
'render_list.h',
'builtin_objects.c',
'gfx_framegraph.c',
'gfx_main.c',
'render_list.c',

View File

@ -30,6 +30,7 @@ RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle);
RT_RENDER_BACKEND_HANDLE(rt_render_target_handle);
RT_RENDER_BACKEND_HANDLE(rt_command_buffer_handle);
RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle);
RT_RENDER_BACKEND_HANDLE(rt_buffer_handle);
#undef RT_RENDER_BACKEND_HANDLE
@ -126,6 +127,34 @@ typedef struct {
uint64_t initial_value;
} rt_gpu_semaphore_info;
typedef enum {
RT_BUFFER_TYPE_VERTEX,
RT_BUFFER_TYPE_INDEX,
RT_BUFFER_TYPE_UNIFORM,
RT_BUFFER_TYPE_STORAGE,
RT_BUFFER_TYPE_count
} rt_buffer_type;
typedef enum {
/* Create once, never change the data. */
RT_BUFFER_USAGE_STATIC,
/* Update occasionally (after a number of frames) */
RT_BUFFER_USAGE_DYNAMIC,
/* Create, use once and then discard */
RT_BUFFER_USAGE_TRANSIENT,
RT_BUFFER_USAGE_count,
} rt_buffer_usage;
typedef struct {
size_t size;
rt_buffer_type type;
rt_buffer_usage usage;
const void *data;
} rt_buffer_info;
typedef enum {
RT_PASS_LOAD_MODE_LOAD,
RT_PASS_LOAD_MODE_CLEAR,
@ -200,6 +229,9 @@ typedef void rt_destroy_gpu_semaphores_fn(uint32_t count, rt_gpu_semaphore_handl
typedef uint64_t rt_get_gpu_semaphore_value_fn(rt_gpu_semaphore_handle semaphore);
typedef rt_gpu_semaphore_handle rt_get_swapchain_available_semaphore_fn(void);
typedef rt_gpu_semaphore_handle rt_get_render_finished_semaphore_fn(void);
typedef rt_result
rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers);
typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers);
typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf,
const rt_cmd_begin_pass_info *info);
@ -226,6 +258,8 @@ typedef struct {
rt_get_gpu_semaphore_value_fn *GetSemaphoreValue;
rt_get_swapchain_available_semaphore_fn *GetSwapchainAvailableSemaphore;
rt_get_render_finished_semaphore_fn *GetRenderFinishedSemaphore;
rt_create_buffers_fn *CreateBuffers;
rt_destroy_buffers_fn *DestroyBuffers;
/* Command Buffer Functions */
rt_cmd_begin_pass_fn *CmdBeginPass;

View File

@ -103,6 +103,19 @@ rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) {
return (rt_gpu_semaphore_handle){.index = 2, .version = 1};
}
rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
const rt_buffer_info *info,
rt_buffer_handle *p_buffers) {
RT_UNUSED(info);
RETURN_HANDLE_ARRAY_STUB(p_buffers, count);
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
RT_UNUSED(count);
RT_UNUSED(buffers);
}
void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmd,
const rt_cmd_begin_pass_info *info) {
RT_UNUSED(cmd);
@ -119,4 +132,4 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmd,
RT_UNUSED(cmd);
RT_UNUSED(target);
RT_UNUSED(state);
}
}

69
src/renderer/vk/buffers.c Normal file
View File

@ -0,0 +1,69 @@
#include "gpu.h"
#include "gfx/renderer_api.h"
#include "runtime/config.h"
#include "runtime/threading.h"
#include <stdlib.h>
#include <string.h>
RT_CVAR_I(rt_VkMaxBufferCount, "Number of slots for gpu buffers. Default: 1024.", 1024);
typedef struct rt_buffer_data_s {
VkBuffer buffer;
VmaAllocation allocation;
size_t size;
rt_buffer_usage usage;
rt_buffer_type type;
rt_rwlock lock;
struct rt_buffer_data_s *next_free;
} rt_buffer_data;
static rt_buffer_data *_buffers;
static rt_buffer_data *_first_free;
static rt_mutex *_list_lock;
rt_result InitBufferManagement(void) {
size_t n = (size_t)rt_VkMaxBufferCount.i;
_buffers = malloc(sizeof(rt_buffer_data) * n);
if (!_buffers)
return RT_OUT_OF_MEMORY;
return RT_SUCCESS;
}
void ShutdownBufferManagement(void) {
for (int i = 0; i < rt_VkMaxBufferCount.i; ++i) {
if (_buffers[i].buffer == VK_NULL_HANDLE)
continue;
vmaDestroyBuffer(g_gpu.allocator, _buffers[i].buffer, _buffers[i].allocation);
rtDestroyRWLock(&_buffers[i].lock);
memset(&_buffers[i], 0, sizeof(_buffers[i]));
}
free(_buffers);
_first_free = NULL;
rtDestroyMutex(_list_lock);
}
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers);
rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
const rt_buffer_info *info,
rt_buffer_handle *p_buffers) {
for (uint32_t i = 0; i < count; ++i) {
rtLockMutex(_list_lock);
rt_buffer_data *slot = _first_free;
if (!slot) {
rtUnlockMutex(_list_lock);
if (i > 0)
rtRenDestroyBuffers(i, p_buffers);
return RT_OUT_OF_MEMORY;
}
rtUnlockMutex(_list_lock);
}
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
}

View File

@ -233,6 +233,8 @@ rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
return result;
}
#define RT_VK_LOG_SUBMIT_INFO 1
rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
const rt_submit_command_buffers_info *info) {
@ -313,6 +315,35 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
vkEndCommandBuffer(command_buffers[i].commandBuffer);
}
#if RT_VK_LOG_SUBMIT_INFO
{
const char *queue_str = "<invalid>";
if (queue == RT_GRAPHICS_QUEUE)
queue_str = "GRAPHICS";
else if (queue == RT_COMPUTE_QUEUE)
queue_str = "COMPUTE";
else if (queue == RT_TRANSFER_QUEUE)
queue_str = "TRANSFER";
rtLog("vk", "Submit Info");
rtLog("vk", "Queue: %s", queue_str);
rtLog("vk", "Command Buffers: %u", count);
rtLog("vk", " - TODO: More Info");
rtLog("vk", "Wait Semaphores:");
for (uint32_t i = 0; i < wait_count; ++i) {
rtLog("vk", " - %u:%u Value %u", info->wait_semaphores[i].version,
info->wait_semaphores[i].index, info->wait_values[i]);
}
rtLog("vk", "Signal Semaphores:");
for (uint32_t i = 0; i < signal_count; ++i) {
rtLog("vk",
" - %u:%u Value %u",
info->signal_semaphores[i].version,
info->signal_semaphores[i].index,
info->signal_values[i]);
}
}
#endif
VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = wait_count,

View File

@ -161,37 +161,120 @@ void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmdbuf_handle) {
#endif
}
void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdbuf_handle,
rt_render_target_handle render_target,
rt_render_target_state new_state) {
GET_CMDBUF(cmdbuf, cmdbuf_handle)
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) {
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
/* Non-layout transition barrier */
static void ExecuteRenderTargetBarrier(rt_render_target *rt,
uint32_t image_index,
VkCommandBuffer cmdbuf) { /* Determine old and new layout */
VkImageLayout layout;
switch (rt->states[image_index]) {
case RT_RENDER_TARGET_STATE_ATTACHMENT:
layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
break;
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
break;
default:
layout = VK_IMAGE_LAYOUT_UNDEFINED;
break;
}
rt_render_target *rt = rtGetRenderTarget(render_target);
if (!rt) {
rtLog("vk", "Tried to transition invalid render target");
return;
#ifdef RT_DEBUG
VkDebugUtilsLabelEXT debug_label = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.pLabelName = "Render Target Barrier",
.color = {.13f, .54f, .13f, .75f},
};
vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
#endif
VkImageAspectFlags aspect_mask =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
: VK_IMAGE_ASPECT_COLOR_BIT;
/* Determine access flags */
VkPipelineStageFlags2 src_stage = 0;
VkPipelineStageFlags2 dst_stage = 0;
VkAccessFlags2 src_access = 0;
VkAccessFlags2 dst_access = 0;
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
src_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
dst_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT;
src_stage =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
dst_stage =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
src_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT;
dst_access = VK_ACCESS_2_SHADER_READ_BIT;
src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
dst_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
}
if (rt->states[image_index] == new_state)
return;
VkImageMemoryBarrier2 image_barrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.srcStageMask = src_stage,
.srcAccessMask = src_access,
.dstStageMask = dst_stage,
.dstAccessMask = dst_access,
.oldLayout = layout,
.newLayout = layout,
.image = rt->image[image_index],
/* clang-format off */
.subresourceRange = {
.aspectMask = aspect_mask,
.baseArrayLayer = 0,
.baseMipLevel = 0,
.layerCount = 1,
.levelCount = 1,
},
/* clang-format on */
};
VkDependencyInfo dep_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = &image_barrier,
.imageMemoryBarrierCount = 1,
};
vkCmdPipelineBarrier2(cmdbuf, &dep_info);
#ifdef RT_DEBUG
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
#endif
}
static void DoLayoutTransition(rt_render_target *rt,
uint32_t image_index,
rt_render_target_state new_state,
VkCommandBuffer cmdbuf) {
/* Determine old and new layout */
VkImageLayout old_layout;
switch (rt->states[image_index]) {
case RT_RENDER_TARGET_STATE_ATTACHMENT:
if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) {
old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else {
old_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
old_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
break;
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
old_layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL;
break;
default:
old_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@ -200,15 +283,11 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb
VkImageLayout new_layout;
switch (new_state) {
case RT_RENDER_TARGET_STATE_ATTACHMENT:
if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) {
new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else {
new_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
new_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
break;
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
new_layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL;
break;
default:
new_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@ -227,23 +306,59 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
: VK_IMAGE_ASPECT_COLOR_BIT;
VkPipelineStageFlags2 src_stage = 0;
VkPipelineStageFlags2 dst_stage = 0;
/* Determine access flags */
VkAccessFlags2 src_access = 0;
VkAccessFlags2 dst_access = 0;
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
src_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
src_stage =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
src_access = VK_ACCESS_2_SHADER_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
}
if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
dst_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
dst_stage =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
dst_access = VK_ACCESS_2_SHADER_READ_BIT;
dst_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
}
VkImageMemoryBarrier2 image_barrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT | VK_ACCESS_2_MEMORY_READ_BIT,
.srcStageMask = src_stage,
.srcAccessMask = src_access,
.dstStageMask = dst_stage,
.dstAccessMask = dst_access,
.oldLayout = old_layout,
.newLayout = new_layout,
.image = rt->image[image_index],
/* clang-format off */
.subresourceRange = {
.aspectMask = aspect_mask,
.baseArrayLayer = 0,
.baseMipLevel = 0,
.layerCount = 1,
.levelCount = 1,
},
.subresourceRange = {
.aspectMask = aspect_mask,
.baseArrayLayer = 0,
.baseMipLevel = 0,
.layerCount = 1,
.levelCount = 1,
},
/* clang-format on */
};
@ -260,3 +375,24 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb
rt->states[image_index] = new_state;
}
void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdbuf_handle,
rt_render_target_handle render_target,
rt_render_target_state new_state) {
GET_CMDBUF(cmdbuf, cmdbuf_handle)
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) {
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
}
rt_render_target *rt = rtGetRenderTarget(render_target);
if (!rt) {
rtLog("vk", "Tried to transition invalid render target");
return;
}
if (rt->states[image_index] != new_state)
DoLayoutTransition(rt, image_index, new_state, cmdbuf);
else
ExecuteRenderTargetBarrier(rt, image_index, cmdbuf);
}

View File

@ -89,7 +89,8 @@ DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
else if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
severity_str = "ERROR";
rtLog("vk", "[%s] %s", severity_str, callbackData->pMessage);
RT_DEBUGBREAK;
if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
RT_DEBUGBREAK;
return VK_FALSE;
}
@ -645,6 +646,8 @@ extern rt_result InitCommandBufferManagement(void);
extern void ShutdownCommandBufferManagement(void);
extern rt_result InitializeSempahoreManagement(void);
extern void ShutdownSemaphoreManagement(void);
extern rt_result InitBufferManagement(void);
extern void ShutdownBufferManagement(void);
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
rtLog("vk", "Init");
@ -691,6 +694,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
if (res != RT_SUCCESS)
return res;
res = InitCommandBufferManagement();
if (res != RT_SUCCESS)
return res;
res = InitBufferManagement();
if (res != RT_SUCCESS)
return res;
res = rtCreateSwapchain();
@ -705,6 +711,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) {
rtLog("vk", "Shutdown");
vkDeviceWaitIdle(g_gpu.device);
rtDestroySwapchain();
ShutdownBufferManagement();
ShutdownCommandBufferManagement();
ShutdownSemaphoreManagement();
ShutdownRenderTargetManagement();

View File

@ -16,6 +16,7 @@ if vk_dep.found()
'render_targets.h',
'swapchain.h',
'buffers.c',
'command_buffers.c',
'commands.c',
'frame.c',

View File

@ -9,6 +9,9 @@
#include <X11/Xlib.h>
#endif
#include <stdlib.h>
#include <string.h>
/* GFX */
#include "gfx/gfx.h"

View File

@ -42,7 +42,9 @@ static bool CreateImageAndView(VkExtent2D extent,
VkImageAspectFlagBits aspect,
VkImage *p_image,
VmaAllocation *p_allocation,
VkImageView *p_view) {
VkImageView *p_view,
const char *rt_name,
uint32_t image_index) {
uint32_t queue_families[3];
uint32_t distinct_queue_families = 1;
@ -140,6 +142,25 @@ static bool CreateImageAndView(VkExtent2D extent,
return false;
}
#ifdef RT_DEBUG
char name[260];
rtSPrint(name, 260, "%s (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index);
VkDebugUtilsObjectNameInfoEXT name_info = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.objectHandle = (uint64_t)image,
.pObjectName = name,
.objectType = VK_OBJECT_TYPE_IMAGE};
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
rtSPrint(name, 260, "%s [view] (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index);
name_info =
(VkDebugUtilsObjectNameInfoEXT){.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.objectHandle = (uint64_t)view,
.pObjectName = name,
.objectType = VK_OBJECT_TYPE_IMAGE_VIEW};
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
#endif
*p_image = image;
*p_allocation = allocation;
*p_view = view;
@ -201,6 +222,8 @@ rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_t
*/
rtUnlockWrite(&_lock);
const char *name = rtResolveConstRelptr(&info->name);
slot->render_target.match_swapchain = 0;
slot->render_target.image_count = g_swapchain.image_count;
for (unsigned int i = 0; i < g_swapchain.image_count; ++i) {
@ -243,7 +266,9 @@ rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_t
slot->render_target.aspect,
&slot->render_target.image[i],
&slot->render_target.allocation[i],
&slot->render_target.view[i])) {
&slot->render_target.view[i],
name,
i)) {
slot->render_target.image_count = i;
DestroyRenderTarget(slot);
goto out;
@ -332,7 +357,9 @@ void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, V
render_target->aspect,
&render_target->image[j],
&render_target->allocation[j],
&render_target->view[j])) {
&render_target->view[j],
NULL,
j)) {
render_target->image_count = j;
DestroyRenderTarget(&_render_targets[i]);
rtReportError("VK", "Failed to recreate swapchain-matching render target");
@ -349,7 +376,9 @@ void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, V
render_target->aspect,
&render_target->image[j],
&render_target->allocation[j],
&render_target->view[j])) {
&render_target->view[j],
NULL,
j)) {
render_target->image_count = j;
DestroyRenderTarget(&_render_targets[i]);
rtReportError("VK", "Failed to create additional render target images");

View File

@ -28,7 +28,9 @@ typedef struct {
static rt_device_swapchain_parameters DetermineSwapchainParameters(void) {
rt_device_swapchain_parameters params;
/* determine presentation mode. FIFO should always be available */
/* determine presentation mode. FIFO should always be available.
* TODO: If vsync is enabled, we should always choose FIFO.
*/
params.present_mode = VK_PRESENT_MODE_FIFO_KHR;
if (r_VkPreferMailboxMode.i) {
VkPresentModeKHR modes[6];
@ -43,6 +45,7 @@ static rt_device_swapchain_parameters DetermineSwapchainParameters(void) {
/* Determine surface format */
VkSurfaceFormatKHR formats[64];
uint32_t format_count = 64;
vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, NULL);
vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, formats);
params.surface_format = formats[0];
for (uint32_t i = 0; i < format_count; ++i) {

View File

@ -49,7 +49,7 @@ static bool DisplayErrorBox(const char *text) {
static void LogOut(const char *text) {
#ifdef _WIN32
WCHAR msg[256];
WCHAR msg[512];
rtUTF8ToWStr(text, msg, RT_ARRAY_COUNT(msg));
OutputDebugStringW(msg);
#endif
@ -57,7 +57,7 @@ static void LogOut(const char *text) {
}
RT_DLLEXPORT void rtReportError(const char *subsystem, const char *fmt, ...) {
char buf[256];
char buf[512];
int at = rtSPrint(buf, RT_ARRAY_COUNT(buf) - 1, "[%s] ", subsystem);
va_list ap;
@ -73,7 +73,7 @@ RT_DLLEXPORT void rtReportError(const char *subsystem, const char *fmt, ...) {
}
RT_DLLEXPORT void rtLog(const char *subsystem, const char *fmt, ...) {
char buf[256];
char buf[512];
int at = rtSPrint(buf, RT_ARRAY_COUNT(buf), "[%s] ", subsystem);
va_list ap;

View File

@ -10,7 +10,7 @@ struct rt_condition_var_s {
ptrdiff_t next_reusable;
};
#define MAX_CONDS 1024
#define MAX_CONDS 4096
rt_condition_var _conds[MAX_CONDS];
static ptrdiff_t _first_reusable = MAX_CONDS;
static ptrdiff_t _next = 0;
@ -75,7 +75,7 @@ struct rt_condition_var_s {
ptrdiff_t next_reusable;
};
#define MAX_CONDS 1024
#define MAX_CONDS 4096
rt_condition_var _conds[MAX_CONDS];
static ptrdiff_t _first_reusable = MAX_CONDS;
static ptrdiff_t _next = 0;

View File

@ -11,7 +11,7 @@ struct rt_mutex_s {
DWORD owner;
};
#define MAX_MUTEX 1024
#define MAX_MUTEX 4096
static rt_mutex _mutex[MAX_MUTEX];
static ptrdiff_t _first_reusable = MAX_MUTEX;
static ptrdiff_t _next = 0;
@ -106,7 +106,7 @@ struct rt_mutex_s {
ptrdiff_t next_reusable;
};
#define MAX_MUTEX 1024
#define MAX_MUTEX 4096
static rt_mutex _mutex[MAX_MUTEX];
static ptrdiff_t _first_reusable = MAX_MUTEX;
static ptrdiff_t _next = 0;

31
vk_layer_settings.txt Normal file
View File

@ -0,0 +1,31 @@
# The main, heavy-duty validation checks. This may be valuable early in the
# development cycle to reduce validation output while correcting
# parameter/object usage errors.
khronos_validation.validate_core = true
# Enable synchronization validation during command buffers recording. This
# feature reports resource access conflicts due to missing or incorrect
# synchronization operations between actions (Draw, Copy, Dispatch, Blit)
# reading or writing the same regions of memory.
khronos_validation.validate_sync = true
# Thread checks. In order to not degrade performance, it might be best to run
# your program with thread-checking disabled most of the time, enabling it
# occasionally for a quick sanity check or when debugging difficult application
# behaviors.
khronos_validation.thread_safety = true
# Specifies what action is to be taken when a layer reports information
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG
# Comma-delineated list of options specifying the types of messages to be reported
khronos_validation.report_flags = debug,error,perf,info,warn
# Enable limiting of duplicate messages.
khronos_validation.enable_message_limit = true
# Maximum number of times any single validation message should be reported.
khronos_validation.duplicate_message_limit = 3
# Enable once the implementation is more mature
khronos_validation.validate_best_practices = false