diff --git a/assets/forward.framegraph b/assets/forward.framegraph new file mode 100644 index 0000000..9d66e4a --- /dev/null +++ b/assets/forward.framegraph @@ -0,0 +1,27 @@ +render_targets { + swapchain_out { + format SWAPCHAIN; + width SWAPCHAIN; + height SWAPCHAIN; + sample_count 1; + } +} + +passes { + forward { + type GRAPHICS; + writes { + swapchain_out { + clear_value { + r 1.0; + g 1.0; + b 1.0; + a 1.0; + } + + clear YES; + discard NO; + } + } + } +} diff --git a/src/game/main.c b/src/game/main.c index 445072c..ca262dd 100644 --- a/src/game/main.c +++ b/src/game/main.c @@ -18,7 +18,7 @@ static void PassPrepare(rt_render_pass_id pass, uint32_t write_count, const rt_render_target_read *reads, uint32_t read_count) { - //rtLog("GAME", "Prepare pass %x", pass); + // rtLog("GAME", "Prepare pass %x", pass); } static void PassExecute(rt_render_pass_id pass, @@ -26,7 +26,7 @@ static void PassExecute(rt_render_pass_id pass, uint32_t write_count, const rt_render_target_read *reads, uint32_t read_count) { - //rtLog("GAME", "Execute pass %x", pass); + // rtLog("GAME", "Execute pass %x", pass); } static void PassFinalize(rt_render_pass_id pass, @@ -34,7 +34,7 @@ static void PassFinalize(rt_render_pass_id pass, uint32_t write_count, const rt_render_target_read *reads, uint32_t read_count) { - //rtLog("GAME", "Finalize pass %x", pass); + // rtLog("GAME", "Finalize pass %x", pass); } /* Called after the runtime has finished its initialization and before entering the main-loop*/ @@ -46,6 +46,19 @@ void Init(void) { rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); +#if 0 + rt_resource_id resid = rtGetResourceID("assets/forward.framegraph"); + size_t size = rtGetResourceSize(resid); + rt_resource *res = rtArenaPush(temp.arena, size); + rtGetResource(resid, res); + + _framegraph = rtCreateFramegraph(res->data); + + rt_render_pass_bind_fns bind = {.Execute = PassExecute, + .Prepare = PassPrepare, + .Finalize = PassFinalize}; + rtBindRenderPass(_framegraph, rtCalculateRenderPassID("forward", sizeof("forward") - 1), &bind); +#else rt_resource_id resid = rtGetResourceID("assets/test.framegraph"); size_t size = rtGetResourceSize(resid); rt_resource *res = rtArenaPush(temp.arena, size); @@ -58,6 +71,7 @@ void Init(void) { .Finalize = PassFinalize}; rtBindRenderPass(_framegraph, rtCalculateRenderPassID("pass0", sizeof("pass0") - 1), &bind); rtBindRenderPass(_framegraph, rtCalculateRenderPassID("pass1", sizeof("pass1") - 1), &bind); +#endif } /* Called after exiting the main-loop and before the runtime starts its shutdown */ diff --git a/src/gfx/builtin_objects.c b/src/gfx/builtin_objects.c new file mode 100644 index 0000000..9eb54a7 --- /dev/null +++ b/src/gfx/builtin_objects.c @@ -0,0 +1,10 @@ +#define RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL +#include "builtin_objects.h" + +rt_builtin_render_object_types g_builtin_render_object_types; + +RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void) { + g_builtin_render_object_types.render_mesh = + rtRegisterRenderObjectType(sizeof(rt_render_mesh), "render_mesh"); + return RT_SUCCESS; +} \ No newline at end of file diff --git a/src/gfx/builtin_objects.h b/src/gfx/builtin_objects.h new file mode 100644 index 0000000..eda231c --- /dev/null +++ b/src/gfx/builtin_objects.h @@ -0,0 +1,39 @@ +#ifndef RT_GFX_BUILTIN_OBJECTS_H +#define RT_GFX_BUILTIN_OBJECTS_H + +/* Render Object types used by the builtin graphics passes. + * + * As an user you are free to not use these, but then you + * also cannot use the builtin render passes. */ + +#include "renderer_api.h" +#include "render_list.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + rt_buffer_handle vbo; + rt_buffer_handle ibo; + uint32_t vertex_count; + uint32_t index_count; +} rt_render_mesh; + + + +typedef struct { + rt_render_object_type render_mesh; +} rt_builtin_render_object_types; + +#ifndef RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL +extern RT_DLLIMPORT rt_builtin_render_object_types g_builtin_render_object_types; +#endif + +RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gfx/gfx_framegraph.c b/src/gfx/gfx_framegraph.c index 3b55dec..3260763 100644 --- a/src/gfx/gfx_framegraph.c +++ b/src/gfx/gfx_framegraph.c @@ -647,14 +647,14 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr } /* Find the last pass that writes to the swapchain */ - uint32_t last_swapchain_write = 0; + uint32_t last_swapchain_write = framegraph->pass_count - 1; for (uint32_t i = framegraph->pass_count - 1; i > 0; --i) { if (framegraph->passes[i].writes_swapchain) { last_swapchain_write = i; break; } } - /* Find the first pass that reads the swapchain 0*/ + /* Find the first pass that reads the swapchain */ uint32_t first_swapchain_read = 0; for (uint32_t i = 0; framegraph->pass_count; ++i) { if (framegraph->passes[i].reads_swapchain || framegraph->passes[i].writes_swapchain) { @@ -705,19 +705,25 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr bool is_graphics_pass = framegraph->passes[pass_idx].type == RT_RENDER_PASS_TYPE_GRAPHICS; if (is_graphics_pass) { - graphics_wait_semaphore_count += framegraph->passes[pass_idx].read_count; + graphics_wait_semaphore_count += framegraph->passes[pass_idx].read_count + + framegraph->passes[pass_idx].write_count; graphics_signal_semaphore_count += framegraph->passes[pass_idx].write_count; - if (framegraph->passes[pass_idx].reads_swapchain) + if (framegraph->passes[pass_idx].reads_swapchain || + pass_idx == first_swapchain_read) graphics_wait_semaphore_count += 1; - if (framegraph->passes[pass_idx].writes_swapchain) + if (framegraph->passes[pass_idx].writes_swapchain || + pass_idx == last_swapchain_write) graphics_signal_semaphore_count += 1; ++graphics_command_buffer_count; } else { - compute_wait_semaphore_count += framegraph->passes[pass_idx].read_count; + compute_wait_semaphore_count += framegraph->passes[pass_idx].read_count + + framegraph->passes[pass_idx].write_count; compute_signal_semaphore_count += framegraph->passes[pass_idx].write_count; - if (framegraph->passes[pass_idx].reads_swapchain) + if (framegraph->passes[pass_idx].reads_swapchain || + pass_idx == first_swapchain_read) compute_wait_semaphore_count += 1; - if (framegraph->passes[pass_idx].writes_swapchain) + if (framegraph->passes[pass_idx].writes_swapchain || + pass_idx == last_swapchain_write) compute_signal_semaphore_count += 1; ++compute_command_buffer_count; } @@ -850,6 +856,12 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr signal_semaphores[*signal_count] = rt->semaphore; signal_values[*signal_count] = signal_value_base + execution_level + 1; *signal_count += 1; + + if (signal_value_base >= 200) { + wait_semaphores[*wait_count] = rt->semaphore; + wait_values[*wait_count] = signal_value_base - 200 + execution_level + 1; + *wait_count += 1; + } } if (pass_idx == first_swapchain_read) { wait_semaphores[*wait_count] = swapchain_available; diff --git a/src/gfx/meson.build b/src/gfx/meson.build index c70a14a..237b400 100644 --- a/src/gfx/meson.build +++ b/src/gfx/meson.build @@ -1,11 +1,13 @@ gfx_deps = [thread_dep, m_dep] gfx_lib = library('rtgfx', # Project Sources + 'builtin_objects.h', 'effect.h', 'gfx.h', 'renderer_api.h', 'render_list.h', + 'builtin_objects.c', 'gfx_framegraph.c', 'gfx_main.c', 'render_list.c', diff --git a/src/gfx/renderer_api.h b/src/gfx/renderer_api.h index 5577f0c..fb21b2a 100644 --- a/src/gfx/renderer_api.h +++ b/src/gfx/renderer_api.h @@ -30,6 +30,7 @@ RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle); RT_RENDER_BACKEND_HANDLE(rt_render_target_handle); RT_RENDER_BACKEND_HANDLE(rt_command_buffer_handle); RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle); +RT_RENDER_BACKEND_HANDLE(rt_buffer_handle); #undef RT_RENDER_BACKEND_HANDLE @@ -126,6 +127,34 @@ typedef struct { uint64_t initial_value; } rt_gpu_semaphore_info; +typedef enum { + RT_BUFFER_TYPE_VERTEX, + RT_BUFFER_TYPE_INDEX, + RT_BUFFER_TYPE_UNIFORM, + RT_BUFFER_TYPE_STORAGE, + RT_BUFFER_TYPE_count +} rt_buffer_type; + +typedef enum { + /* Create once, never change the data. */ + RT_BUFFER_USAGE_STATIC, + + /* Update occasionally (after a number of frames) */ + RT_BUFFER_USAGE_DYNAMIC, + + /* Create, use once and then discard */ + RT_BUFFER_USAGE_TRANSIENT, + + RT_BUFFER_USAGE_count, +} rt_buffer_usage; + +typedef struct { + size_t size; + rt_buffer_type type; + rt_buffer_usage usage; + const void *data; +} rt_buffer_info; + typedef enum { RT_PASS_LOAD_MODE_LOAD, RT_PASS_LOAD_MODE_CLEAR, @@ -200,6 +229,9 @@ typedef void rt_destroy_gpu_semaphores_fn(uint32_t count, rt_gpu_semaphore_handl typedef uint64_t rt_get_gpu_semaphore_value_fn(rt_gpu_semaphore_handle semaphore); typedef rt_gpu_semaphore_handle rt_get_swapchain_available_semaphore_fn(void); typedef rt_gpu_semaphore_handle rt_get_render_finished_semaphore_fn(void); +typedef rt_result +rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers); +typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers); typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf, const rt_cmd_begin_pass_info *info); @@ -226,6 +258,8 @@ typedef struct { rt_get_gpu_semaphore_value_fn *GetSemaphoreValue; rt_get_swapchain_available_semaphore_fn *GetSwapchainAvailableSemaphore; rt_get_render_finished_semaphore_fn *GetRenderFinishedSemaphore; + rt_create_buffers_fn *CreateBuffers; + rt_destroy_buffers_fn *DestroyBuffers; /* Command Buffer Functions */ rt_cmd_begin_pass_fn *CmdBeginPass; diff --git a/src/renderer/null/null.c b/src/renderer/null/null.c index 09339f8..38de3fe 100644 --- a/src/renderer/null/null.c +++ b/src/renderer/null/null.c @@ -103,6 +103,19 @@ rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) { return (rt_gpu_semaphore_handle){.index = 2, .version = 1}; } +rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count, + const rt_buffer_info *info, + rt_buffer_handle *p_buffers) { + RT_UNUSED(info); + RETURN_HANDLE_ARRAY_STUB(p_buffers, count); + return RT_SUCCESS; +} + +void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) { + RT_UNUSED(count); + RT_UNUSED(buffers); +} + void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmd, const rt_cmd_begin_pass_info *info) { RT_UNUSED(cmd); @@ -119,4 +132,4 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmd, RT_UNUSED(cmd); RT_UNUSED(target); RT_UNUSED(state); -} +} \ No newline at end of file diff --git a/src/renderer/vk/buffers.c b/src/renderer/vk/buffers.c new file mode 100644 index 0000000..831d02d --- /dev/null +++ b/src/renderer/vk/buffers.c @@ -0,0 +1,69 @@ +#include "gpu.h" +#include "gfx/renderer_api.h" +#include "runtime/config.h" +#include "runtime/threading.h" + +#include +#include + +RT_CVAR_I(rt_VkMaxBufferCount, "Number of slots for gpu buffers. Default: 1024.", 1024); + +typedef struct rt_buffer_data_s { + VkBuffer buffer; + VmaAllocation allocation; + size_t size; + rt_buffer_usage usage; + rt_buffer_type type; + + rt_rwlock lock; + + struct rt_buffer_data_s *next_free; +} rt_buffer_data; + +static rt_buffer_data *_buffers; +static rt_buffer_data *_first_free; +static rt_mutex *_list_lock; + +rt_result InitBufferManagement(void) { + size_t n = (size_t)rt_VkMaxBufferCount.i; + _buffers = malloc(sizeof(rt_buffer_data) * n); + if (!_buffers) + return RT_OUT_OF_MEMORY; + return RT_SUCCESS; +} + +void ShutdownBufferManagement(void) { + for (int i = 0; i < rt_VkMaxBufferCount.i; ++i) { + if (_buffers[i].buffer == VK_NULL_HANDLE) + continue; + vmaDestroyBuffer(g_gpu.allocator, _buffers[i].buffer, _buffers[i].allocation); + rtDestroyRWLock(&_buffers[i].lock); + memset(&_buffers[i], 0, sizeof(_buffers[i])); + } + free(_buffers); + _first_free = NULL; + rtDestroyMutex(_list_lock); +} + +void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers); + +rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count, + const rt_buffer_info *info, + rt_buffer_handle *p_buffers) { + for (uint32_t i = 0; i < count; ++i) { + rtLockMutex(_list_lock); + rt_buffer_data *slot = _first_free; + if (!slot) { + rtUnlockMutex(_list_lock); + if (i > 0) + rtRenDestroyBuffers(i, p_buffers); + return RT_OUT_OF_MEMORY; + } + rtUnlockMutex(_list_lock); + } + return RT_SUCCESS; +} + +void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) { + +} diff --git a/src/renderer/vk/command_buffers.c b/src/renderer/vk/command_buffers.c index 4dfeec1..00721d8 100644 --- a/src/renderer/vk/command_buffers.c +++ b/src/renderer/vk/command_buffers.c @@ -233,6 +233,8 @@ rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, return result; } +#define RT_VK_LOG_SUBMIT_INFO 1 + rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue, const rt_submit_command_buffers_info *info) { @@ -313,6 +315,35 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue, vkEndCommandBuffer(command_buffers[i].commandBuffer); } +#if RT_VK_LOG_SUBMIT_INFO + { + const char *queue_str = ""; + if (queue == RT_GRAPHICS_QUEUE) + queue_str = "GRAPHICS"; + else if (queue == RT_COMPUTE_QUEUE) + queue_str = "COMPUTE"; + else if (queue == RT_TRANSFER_QUEUE) + queue_str = "TRANSFER"; + rtLog("vk", "Submit Info"); + rtLog("vk", "Queue: %s", queue_str); + rtLog("vk", "Command Buffers: %u", count); + rtLog("vk", " - TODO: More Info"); + rtLog("vk", "Wait Semaphores:"); + for (uint32_t i = 0; i < wait_count; ++i) { + rtLog("vk", " - %u:%u Value %u", info->wait_semaphores[i].version, + info->wait_semaphores[i].index, info->wait_values[i]); + } + rtLog("vk", "Signal Semaphores:"); + for (uint32_t i = 0; i < signal_count; ++i) { + rtLog("vk", + " - %u:%u Value %u", + info->signal_semaphores[i].version, + info->signal_semaphores[i].index, + info->signal_values[i]); + } + } +#endif + VkSubmitInfo2 submit_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, .waitSemaphoreInfoCount = wait_count, diff --git a/src/renderer/vk/commands.c b/src/renderer/vk/commands.c index 5873876..493146d 100644 --- a/src/renderer/vk/commands.c +++ b/src/renderer/vk/commands.c @@ -161,37 +161,120 @@ void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmdbuf_handle) { #endif } -void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdbuf_handle, - rt_render_target_handle render_target, - rt_render_target_state new_state) { - GET_CMDBUF(cmdbuf, cmdbuf_handle) - uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight; - if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) { - image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index; +/* Non-layout transition barrier */ +static void ExecuteRenderTargetBarrier(rt_render_target *rt, + uint32_t image_index, + VkCommandBuffer cmdbuf) { /* Determine old and new layout */ + VkImageLayout layout; + switch (rt->states[image_index]) { + case RT_RENDER_TARGET_STATE_ATTACHMENT: + layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL; + break; + case RT_RENDER_TARGET_STATE_STORAGE_IMAGE: + case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE: + layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + default: + layout = VK_IMAGE_LAYOUT_UNDEFINED; + break; } - rt_render_target *rt = rtGetRenderTarget(render_target); - if (!rt) { - rtLog("vk", "Tried to transition invalid render target"); - return; +#ifdef RT_DEBUG + VkDebugUtilsLabelEXT debug_label = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pLabelName = "Render Target Barrier", + .color = {.13f, .54f, .13f, .75f}, + }; + vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label); +#endif + + VkImageAspectFlags aspect_mask = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT + : VK_IMAGE_ASPECT_COLOR_BIT; + + /* Determine access flags */ + VkPipelineStageFlags2 src_stage = 0; + VkPipelineStageFlags2 dst_stage = 0; + VkAccessFlags2 src_access = 0; + VkAccessFlags2 dst_access = 0; + if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) { + src_access = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT + : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; + dst_access = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT + : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT; + src_stage = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR + : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + dst_stage = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR + : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + } else { /* SAMPLED_IMAGE or STORAGE_IMAGE */ + src_access = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT + : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT; + dst_access = VK_ACCESS_2_SHADER_READ_BIT; + src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + dst_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; } - if (rt->states[image_index] == new_state) - return; + VkImageMemoryBarrier2 image_barrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .srcStageMask = src_stage, + .srcAccessMask = src_access, + .dstStageMask = dst_stage, + .dstAccessMask = dst_access, + .oldLayout = layout, + .newLayout = layout, + .image = rt->image[image_index], + /* clang-format off */ + .subresourceRange = { + .aspectMask = aspect_mask, + .baseArrayLayer = 0, + .baseMipLevel = 0, + .layerCount = 1, + .levelCount = 1, + }, + /* clang-format on */ + }; + + VkDependencyInfo dep_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = &image_barrier, + .imageMemoryBarrierCount = 1, + }; + vkCmdPipelineBarrier2(cmdbuf, &dep_info); + +#ifdef RT_DEBUG + vkCmdEndDebugUtilsLabelEXT(cmdbuf); +#endif +} + +static void DoLayoutTransition(rt_render_target *rt, + uint32_t image_index, + rt_render_target_state new_state, + VkCommandBuffer cmdbuf) { /* Determine old and new layout */ VkImageLayout old_layout; switch (rt->states[image_index]) { case RT_RENDER_TARGET_STATE_ATTACHMENT: - if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) { - old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - } else { - old_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } + old_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL; break; case RT_RENDER_TARGET_STATE_STORAGE_IMAGE: case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE: - old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + old_layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL; break; default: old_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -200,15 +283,11 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb VkImageLayout new_layout; switch (new_state) { case RT_RENDER_TARGET_STATE_ATTACHMENT: - if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) { - new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - } else { - new_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } + new_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL; break; case RT_RENDER_TARGET_STATE_STORAGE_IMAGE: case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE: - new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + new_layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL; break; default: new_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -227,23 +306,59 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + + VkPipelineStageFlags2 src_stage = 0; + VkPipelineStageFlags2 dst_stage = 0; + /* Determine access flags */ + VkAccessFlags2 src_access = 0; + VkAccessFlags2 dst_access = 0; + if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) { + src_access = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT + : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; + src_stage = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR + : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + } else { /* SAMPLED_IMAGE or STORAGE_IMAGE */ + src_access = VK_ACCESS_2_SHADER_WRITE_BIT; + src_stage = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; + } + + if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) { + dst_access = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT + : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; + dst_stage = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR + : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + } else { /* SAMPLED_IMAGE or STORAGE_IMAGE */ + dst_access = VK_ACCESS_2_SHADER_READ_BIT; + dst_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + } + VkImageMemoryBarrier2 image_barrier = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - .dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT | VK_ACCESS_2_MEMORY_READ_BIT, + .srcStageMask = src_stage, + .srcAccessMask = src_access, + .dstStageMask = dst_stage, + .dstAccessMask = dst_access, .oldLayout = old_layout, .newLayout = new_layout, .image = rt->image[image_index], /* clang-format off */ - .subresourceRange = { - .aspectMask = aspect_mask, - .baseArrayLayer = 0, - .baseMipLevel = 0, - .layerCount = 1, - .levelCount = 1, - }, + .subresourceRange = { + .aspectMask = aspect_mask, + .baseArrayLayer = 0, + .baseMipLevel = 0, + .layerCount = 1, + .levelCount = 1, + }, /* clang-format on */ }; @@ -260,3 +375,24 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb rt->states[image_index] = new_state; } + +void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdbuf_handle, + rt_render_target_handle render_target, + rt_render_target_state new_state) { + GET_CMDBUF(cmdbuf, cmdbuf_handle) + uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight; + if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) { + image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index; + } + + rt_render_target *rt = rtGetRenderTarget(render_target); + if (!rt) { + rtLog("vk", "Tried to transition invalid render target"); + return; + } + + if (rt->states[image_index] != new_state) + DoLayoutTransition(rt, image_index, new_state, cmdbuf); + else + ExecuteRenderTargetBarrier(rt, image_index, cmdbuf); +} diff --git a/src/renderer/vk/init.c b/src/renderer/vk/init.c index b5d0cd9..d6f6a63 100644 --- a/src/renderer/vk/init.c +++ b/src/renderer/vk/init.c @@ -89,7 +89,8 @@ DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity, else if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) severity_str = "ERROR"; rtLog("vk", "[%s] %s", severity_str, callbackData->pMessage); - RT_DEBUGBREAK; + if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) + RT_DEBUGBREAK; return VK_FALSE; } @@ -645,6 +646,8 @@ extern rt_result InitCommandBufferManagement(void); extern void ShutdownCommandBufferManagement(void); extern rt_result InitializeSempahoreManagement(void); extern void ShutdownSemaphoreManagement(void); +extern rt_result InitBufferManagement(void); +extern void ShutdownBufferManagement(void); rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { rtLog("vk", "Init"); @@ -691,6 +694,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { if (res != RT_SUCCESS) return res; res = InitCommandBufferManagement(); + if (res != RT_SUCCESS) + return res; + res = InitBufferManagement(); if (res != RT_SUCCESS) return res; res = rtCreateSwapchain(); @@ -705,6 +711,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) { rtLog("vk", "Shutdown"); vkDeviceWaitIdle(g_gpu.device); rtDestroySwapchain(); + ShutdownBufferManagement(); ShutdownCommandBufferManagement(); ShutdownSemaphoreManagement(); ShutdownRenderTargetManagement(); diff --git a/src/renderer/vk/meson.build b/src/renderer/vk/meson.build index eb2d6e6..f830922 100644 --- a/src/renderer/vk/meson.build +++ b/src/renderer/vk/meson.build @@ -16,6 +16,7 @@ if vk_dep.found() 'render_targets.h', 'swapchain.h', + 'buffers.c', 'command_buffers.c', 'commands.c', 'frame.c', diff --git a/src/renderer/vk/pch/vk_pch.h b/src/renderer/vk/pch/vk_pch.h index b7aa648..fe7ee3a 100644 --- a/src/renderer/vk/pch/vk_pch.h +++ b/src/renderer/vk/pch/vk_pch.h @@ -9,6 +9,9 @@ #include #endif +#include +#include + /* GFX */ #include "gfx/gfx.h" diff --git a/src/renderer/vk/render_targets.c b/src/renderer/vk/render_targets.c index 88d9bc5..ec57e36 100644 --- a/src/renderer/vk/render_targets.c +++ b/src/renderer/vk/render_targets.c @@ -42,7 +42,9 @@ static bool CreateImageAndView(VkExtent2D extent, VkImageAspectFlagBits aspect, VkImage *p_image, VmaAllocation *p_allocation, - VkImageView *p_view) { + VkImageView *p_view, + const char *rt_name, + uint32_t image_index) { uint32_t queue_families[3]; uint32_t distinct_queue_families = 1; @@ -140,6 +142,25 @@ static bool CreateImageAndView(VkExtent2D extent, return false; } +#ifdef RT_DEBUG + char name[260]; + rtSPrint(name, 260, "%s (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index); + VkDebugUtilsObjectNameInfoEXT name_info = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .objectHandle = (uint64_t)image, + .pObjectName = name, + .objectType = VK_OBJECT_TYPE_IMAGE}; + vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info); + + rtSPrint(name, 260, "%s [view] (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index); + name_info = + (VkDebugUtilsObjectNameInfoEXT){.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .objectHandle = (uint64_t)view, + .pObjectName = name, + .objectType = VK_OBJECT_TYPE_IMAGE_VIEW}; + vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info); +#endif + *p_image = image; *p_allocation = allocation; *p_view = view; @@ -201,6 +222,8 @@ rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_t */ rtUnlockWrite(&_lock); + const char *name = rtResolveConstRelptr(&info->name); + slot->render_target.match_swapchain = 0; slot->render_target.image_count = g_swapchain.image_count; for (unsigned int i = 0; i < g_swapchain.image_count; ++i) { @@ -243,7 +266,9 @@ rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_t slot->render_target.aspect, &slot->render_target.image[i], &slot->render_target.allocation[i], - &slot->render_target.view[i])) { + &slot->render_target.view[i], + name, + i)) { slot->render_target.image_count = i; DestroyRenderTarget(slot); goto out; @@ -332,7 +357,9 @@ void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, V render_target->aspect, &render_target->image[j], &render_target->allocation[j], - &render_target->view[j])) { + &render_target->view[j], + NULL, + j)) { render_target->image_count = j; DestroyRenderTarget(&_render_targets[i]); rtReportError("VK", "Failed to recreate swapchain-matching render target"); @@ -349,7 +376,9 @@ void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, V render_target->aspect, &render_target->image[j], &render_target->allocation[j], - &render_target->view[j])) { + &render_target->view[j], + NULL, + j)) { render_target->image_count = j; DestroyRenderTarget(&_render_targets[i]); rtReportError("VK", "Failed to create additional render target images"); diff --git a/src/renderer/vk/swapchain.c b/src/renderer/vk/swapchain.c index 6a36632..e76147b 100644 --- a/src/renderer/vk/swapchain.c +++ b/src/renderer/vk/swapchain.c @@ -28,7 +28,9 @@ typedef struct { static rt_device_swapchain_parameters DetermineSwapchainParameters(void) { rt_device_swapchain_parameters params; - /* determine presentation mode. FIFO should always be available */ + /* determine presentation mode. FIFO should always be available. + * TODO: If vsync is enabled, we should always choose FIFO. + */ params.present_mode = VK_PRESENT_MODE_FIFO_KHR; if (r_VkPreferMailboxMode.i) { VkPresentModeKHR modes[6]; @@ -43,6 +45,7 @@ static rt_device_swapchain_parameters DetermineSwapchainParameters(void) { /* Determine surface format */ VkSurfaceFormatKHR formats[64]; uint32_t format_count = 64; + vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, NULL); vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, formats); params.surface_format = formats[0]; for (uint32_t i = 0; i < format_count; ++i) { diff --git a/src/runtime/error_report.c b/src/runtime/error_report.c index 19d00b9..96db41e 100644 --- a/src/runtime/error_report.c +++ b/src/runtime/error_report.c @@ -49,7 +49,7 @@ static bool DisplayErrorBox(const char *text) { static void LogOut(const char *text) { #ifdef _WIN32 - WCHAR msg[256]; + WCHAR msg[512]; rtUTF8ToWStr(text, msg, RT_ARRAY_COUNT(msg)); OutputDebugStringW(msg); #endif @@ -57,7 +57,7 @@ static void LogOut(const char *text) { } RT_DLLEXPORT void rtReportError(const char *subsystem, const char *fmt, ...) { - char buf[256]; + char buf[512]; int at = rtSPrint(buf, RT_ARRAY_COUNT(buf) - 1, "[%s] ", subsystem); va_list ap; @@ -73,7 +73,7 @@ RT_DLLEXPORT void rtReportError(const char *subsystem, const char *fmt, ...) { } RT_DLLEXPORT void rtLog(const char *subsystem, const char *fmt, ...) { - char buf[256]; + char buf[512]; int at = rtSPrint(buf, RT_ARRAY_COUNT(buf), "[%s] ", subsystem); va_list ap; diff --git a/src/runtime/threading_cond.c b/src/runtime/threading_cond.c index 49b3f00..26c2d72 100644 --- a/src/runtime/threading_cond.c +++ b/src/runtime/threading_cond.c @@ -10,7 +10,7 @@ struct rt_condition_var_s { ptrdiff_t next_reusable; }; -#define MAX_CONDS 1024 +#define MAX_CONDS 4096 rt_condition_var _conds[MAX_CONDS]; static ptrdiff_t _first_reusable = MAX_CONDS; static ptrdiff_t _next = 0; @@ -75,7 +75,7 @@ struct rt_condition_var_s { ptrdiff_t next_reusable; }; -#define MAX_CONDS 1024 +#define MAX_CONDS 4096 rt_condition_var _conds[MAX_CONDS]; static ptrdiff_t _first_reusable = MAX_CONDS; static ptrdiff_t _next = 0; diff --git a/src/runtime/threading_mutex.c b/src/runtime/threading_mutex.c index 3e9d430..4871487 100644 --- a/src/runtime/threading_mutex.c +++ b/src/runtime/threading_mutex.c @@ -11,7 +11,7 @@ struct rt_mutex_s { DWORD owner; }; -#define MAX_MUTEX 1024 +#define MAX_MUTEX 4096 static rt_mutex _mutex[MAX_MUTEX]; static ptrdiff_t _first_reusable = MAX_MUTEX; static ptrdiff_t _next = 0; @@ -106,7 +106,7 @@ struct rt_mutex_s { ptrdiff_t next_reusable; }; -#define MAX_MUTEX 1024 +#define MAX_MUTEX 4096 static rt_mutex _mutex[MAX_MUTEX]; static ptrdiff_t _first_reusable = MAX_MUTEX; static ptrdiff_t _next = 0; diff --git a/vk_layer_settings.txt b/vk_layer_settings.txt new file mode 100644 index 0000000..938e454 --- /dev/null +++ b/vk_layer_settings.txt @@ -0,0 +1,31 @@ +# The main, heavy-duty validation checks. This may be valuable early in the +# development cycle to reduce validation output while correcting +# parameter/object usage errors. +khronos_validation.validate_core = true + +# Enable synchronization validation during command buffers recording. This +# feature reports resource access conflicts due to missing or incorrect +# synchronization operations between actions (Draw, Copy, Dispatch, Blit) +# reading or writing the same regions of memory. +khronos_validation.validate_sync = true + +# Thread checks. In order to not degrade performance, it might be best to run +# your program with thread-checking disabled most of the time, enabling it +# occasionally for a quick sanity check or when debugging difficult application +# behaviors. +khronos_validation.thread_safety = true + +# Specifies what action is to be taken when a layer reports information +khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG + +# Comma-delineated list of options specifying the types of messages to be reported +khronos_validation.report_flags = debug,error,perf,info,warn + +# Enable limiting of duplicate messages. +khronos_validation.enable_message_limit = true + +# Maximum number of times any single validation message should be reported. +khronos_validation.duplicate_message_limit = 3 + +# Enable once the implementation is more mature +khronos_validation.validate_best_practices = false