From 3bc192b281a1d3cfd29fd5ea2a555259f7bf333c Mon Sep 17 00:00:00 2001 From: Kevin Trogant Date: Mon, 25 Mar 2024 17:55:03 +0100 Subject: [PATCH] dump state this will be the basis of the framegraph rewrite, because the current state is fucked --- .../simple_vulkan_synchronization/LICENSE.md | 19 + .../simple_vulkan_synchronization/README.md | 153 ++ .../test/README.md | 26 + .../test/tests.c | 357 +++++ .../thsvs_simpler_vulkan_synchronization.h | 1397 +++++++++++++++++ src/game/main.c | 15 + src/gfx/gfx_framegraph.c | 22 +- src/gfx/gfx_main.c | 18 +- src/gfx/renderer_api.h | 5 + src/renderer/null/null.c | 10 + src/renderer/vk/buffers.c | 174 +- src/renderer/vk/command_buffers.c | 12 +- src/renderer/vk/command_buffers.h | 3 +- src/renderer/vk/commands.c | 142 +- src/renderer/vk/frame.c | 6 +- src/renderer/vk/gpu.h | 5 + src/renderer/vk/helper.c | 14 + src/renderer/vk/init.c | 10 + src/renderer/vk/meson.build | 4 + src/renderer/vk/resources.h | 80 + src/renderer/vk/simple_sync_impl.cpp | 6 + src/renderer/vk/transfers.c | 263 ++++ src/renderer/vk/transfers.h | 16 + 23 files changed, 2717 insertions(+), 40 deletions(-) create mode 100644 contrib/simple_vulkan_synchronization/LICENSE.md create mode 100644 contrib/simple_vulkan_synchronization/README.md create mode 100644 contrib/simple_vulkan_synchronization/test/README.md create mode 100644 contrib/simple_vulkan_synchronization/test/tests.c create mode 100644 contrib/simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h create mode 100644 src/renderer/vk/resources.h create mode 100644 src/renderer/vk/simple_sync_impl.cpp create mode 100644 src/renderer/vk/transfers.c create mode 100644 src/renderer/vk/transfers.h diff --git a/contrib/simple_vulkan_synchronization/LICENSE.md b/contrib/simple_vulkan_synchronization/LICENSE.md new file mode 100644 index 0000000..0f321c2 --- /dev/null +++ b/contrib/simple_vulkan_synchronization/LICENSE.md @@ -0,0 +1,19 @@ +Copyright (c) 2017 Tobias Hector + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/contrib/simple_vulkan_synchronization/README.md b/contrib/simple_vulkan_synchronization/README.md new file mode 100644 index 0000000..2a26c2e --- /dev/null +++ b/contrib/simple_vulkan_synchronization/README.md @@ -0,0 +1,153 @@ +# Simplified Vulkan Synchronization + +In an effort to make Vulkan synchronization more accessible, I created this +stb-inspired single-header library in order to somewhat simplify the core +synchronization mechanisms in Vulkan - pipeline barriers and events. + +Rather than the complex maze of enums and bitflags in Vulkan - many +combinations of which are invalid or nonsensical - this library collapses +this to a much shorter list of 40 distinct usage types, and a couple of +options for handling image layouts. + +Use of other synchonization mechanisms such as semaphores, fences and render +passes are not addressed in this API at present. + +## Usage + +#define the symbol THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION in +*one* C/C++ file before the #include of the header; the implementation +will be generated in that file. + +## Version + +alpha.9 + +Alpha.9 adds the thsvsGetAccessInfo function to translate access types into a thsvsVkAccessInfo. + +## Version History + +alpha.8 + +Alpha.8 adds a host preinitialization state for linear images, as well as a number of new access sets for extensions released since the last update. + +alpha.7 + +Alpha.7 incorporates a number of fixes from @gwihlidal, and fixes +handling of pipeline stages in the presence of multiple access types or +barriers in light of other recent changes. + +alpha.6 + +Alpha.6 fixes a typo (VK_ACCESS_TYPE_MEMORY_READ|WRITE_BIT should have been VK_ACCESS_MEMORY_READ|WRITE_BIT), and sets the pipeline stage src and dst flag bits to VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT and VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT during initialization, not 0 as per alpha.5 + +alpha.5 + +Alpha.5 now correctly zeroes out the pipeline stage flags before trying to incrementally set bits on them... common theme here, whoops. + +alpha.4 + +Alpha.4 now correctly zeroes out the access types before trying to incrementally set bits on them (!) + +alpha.3 + +Alpha.3 changes the following: + +Uniform and vertex buffer access in one enum, matching D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER: + - THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER_OR_VERTEX_BUFFER + +Color read *and* write access, matching D3D12_RESOURCE_STATE_RENDER_TARGET: + - THSVS_ACCESS_COLOR_ATTACHMENT_READ_WRITE + +Also the "THSVS_ACCESS_\*\_SHADER_READ_SAMPLED_IMAGE" enums have been renamed to the form "THSVS_ACCESS_\*\_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER" + +alpha.2 + +Alpha.2 adds four new resource states for "ANY SHADER ACCESS": + - THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER + - THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE + - THSVS_ACCESS_ANY_SHADER_READ_OTHER + - THSVS_ACCESS_ANY_SHADER_WRITE + +alpha.1 + +Alpha.1 adds three new resource states: + - THSVS_ACCESS_GENERAL (Any access on the device) + - THSVS_ACCESS_DEPTH_ATTACHMENT_WRITE_STENCIL_READ_ONLY (Write access to only the depth aspect of a depth/stencil attachment) + - THSVS_ACCESS_STENCIL_ATTACHMENT_WRITE_DEPTH_READ_ONLY (Write access to only the stencil aspect of a depth/stencil attachment) + +It also fixes a couple of typos, and adds clarification as to when extensions need to be enabled to use a feature. + +alpha.0 + +This is the very first public release of this library; future revisions +of this API may change the API in an incompatible manner as feedback is +received. +Once the version becomes stable, incompatible changes will only be made +to major revisions of the API - minor revisions will only contain +bugfixes or minor additions. + +## Memory Allocation + +The thsvsCmdPipelineBarrier and thsvsCmdWaitEvents commands allocate +temporary storage for the Vulkan barrier equivalents in order to pass them +to the respective Vulkan commands. + +These use the `THSVS_TEMP_ALLOC(size)` and `THSVS_TEMP_FREE(x)` macros, +which are by default set to alloca(size) and ((void)(x)), respectively. +If you don't want to use stack space or would rather use your own +allocation strategy, these can be overridden by defining these macros +in before #include-ing the header file with +THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION defined. + +I'd rather avoid the need for these allocations in what are likely to be +high-traffic commands, but currently just want to ship something - may +revisit this at a future date based on feedback. + +## Expressiveness Compared to Raw Vulkan + +Despite the fact that this API is fairly simple, it expresses 99% of +what you'd actually ever want to do in practice. +Adding the missing expressiveness would result in increased complexity +which didn't seem worth the tradeoff - however I would consider adding +something for them in future if it becomes an issue. + +Here's a list of known things you can't express: + +* Execution only dependencies cannot be expressed. + These are occasionally useful in conjunction with semaphores, or when + trying to be clever with scheduling - but their usage is both limited + and fairly tricky to get right anyway. +* Depth/Stencil Input Attachments can be read in a shader using either + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL or + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL - this library + *always* uses VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL. + It's possible (though highly unlikely) when aliasing images that this + results in unnecessary transitions. + +## Error Checks + +By default, as with the Vulkan API, this library does NOT check for +errors. +However, a number of optional error checks (`THSVS_ERROR_CHECK_*`) can be +enabled by uncommenting the relevant #defines. +Currently, error checks simply assert at the point a failure is detected +and do not output an error message. +I certainly do not claim they capture *all* possible errors, but they +capture what should be some of the more common ones. +Use of the Vulkan Validation Layers in tandem with this library is +strongly recommended: + https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers + +## Issues + +This header was clean of warnings using -Wall as of time of publishing +on both gcc 4.8.4 and clang 3.5, using the c99 standard. + +There's a potential pitfall in thsvsCmdPipelineBarrier and thsvsCmdWaitEvents +where alloca is used for temporary allocations. See +[Memory Allocation](#memory-allocation) for more information. + +Testing of this library is so far extremely limited with no immediate +plans to add to that - so there's bound to be some amount of bugs. +Please raise these issues on the repo issue tracker, or provide a fix +via a pull request yourself if you're so inclined. diff --git a/contrib/simple_vulkan_synchronization/test/README.md b/contrib/simple_vulkan_synchronization/test/README.md new file mode 100644 index 0000000..6962237 --- /dev/null +++ b/contrib/simple_vulkan_synchronization/test/README.md @@ -0,0 +1,26 @@ +# Tests + +`tests.c` defines a number of unit tests to test that various scenarios +produce the desired output. +Tests are based on the common synchronization examples on the Vulkan-Docs +wiki: https://github.com/KhronosGroup/Vulkan-Docs/wiki/Synchronization-Examples. + +## Building + +On a unix based system these tests can be built using: + +`gcc -o tests tests.c -lvulkan` + +## Running + +Running is straightforward: + +`./tests` + +The executable will write out the tests that are run, whether they pass or +fail, and what caused them to fail if they did. + +## Adding tests + +If you'd like to add a test, just define a new test in main() as per those +that already exist. diff --git a/contrib/simple_vulkan_synchronization/test/tests.c b/contrib/simple_vulkan_synchronization/test/tests.c new file mode 100644 index 0000000..f897244 --- /dev/null +++ b/contrib/simple_vulkan_synchronization/test/tests.c @@ -0,0 +1,357 @@ +// Copyright (c) 2017-2019 Tobias Hector + +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION +#include "../thsvs_simpler_vulkan_synchronization.h" + +void global_barrier_test_array(const char* testName, + unsigned int numPrevAccesses, + ThsvsAccessType* prevAccesses, + unsigned int numNextAccesses, + ThsvsAccessType* nextAccesses, + VkPipelineStageFlags expectedSrcStageMask, + VkPipelineStageFlags expectedDstStageMask, + VkAccessFlags expectedSrcAccessMask, + VkAccessFlags expectedDstAccessMask) +{ + ThsvsGlobalBarrier barrier = {numPrevAccesses, prevAccesses, numNextAccesses, nextAccesses}; + + VkMemoryBarrier vkBarrier = { 0 }; + VkPipelineStageFlags srcStages = 0; + VkPipelineStageFlags dstStages = 0; + unsigned int testPassed = 1; + + thsvsGetVulkanMemoryBarrier(barrier, &srcStages, &dstStages, &vkBarrier); + + printf("Test: %s\n", testName); + + if (srcStages != expectedSrcStageMask) + { + printf("\tUnexpected source stage %0#10X\n", srcStages); + testPassed = 0; + } + + if (dstStages != expectedDstStageMask) + { + printf("\tUnexpected destination stage %0#10X\n", dstStages); + testPassed = 0; + } + + if (vkBarrier.srcAccessMask != expectedSrcAccessMask) + { + printf("\tUnexpected source access mask %0#10X\n", vkBarrier.srcAccessMask); + testPassed = 0; + } + + if (vkBarrier.dstAccessMask != expectedDstAccessMask) + { + printf("\tUnexpected destination access mask %0#10X\n", vkBarrier.dstAccessMask); + testPassed = 0; + } + + if (testPassed == 1) + printf("\tPASSED\n"); + else + printf("\tFAILED\n"); +} + +void global_barrier_test(const char* testName, + ThsvsAccessType prevAccess, + ThsvsAccessType nextAccess, + VkPipelineStageFlags expectedSrcStageMask, + VkPipelineStageFlags expectedDstStageMask, + VkAccessFlags expectedSrcAccessMask, + VkAccessFlags expectedDstAccessMask) +{ + global_barrier_test_array(testName, 1, &prevAccess, 1, &nextAccess, expectedSrcStageMask, expectedDstStageMask, expectedSrcAccessMask, expectedDstAccessMask); +} + +void image_barrier_test_array(const char* testName, + unsigned int numPrevAccesses, + ThsvsAccessType* prevAccesses, + unsigned int numNextAccesses, + ThsvsAccessType* nextAccesses, + VkPipelineStageFlags expectedSrcStageMask, + VkPipelineStageFlags expectedDstStageMask, + VkAccessFlags expectedSrcAccessMask, + VkAccessFlags expectedDstAccessMask, + VkImageLayout expectedOldLayout, + VkImageLayout expectedNewLayout) +{ + ThsvsImageBarrier barrier = {numPrevAccesses, prevAccesses, numNextAccesses, nextAccesses}; + + VkImageMemoryBarrier vkBarrier = { 0 }; + VkPipelineStageFlags srcStages = 0; + VkPipelineStageFlags dstStages = 0; + unsigned int testPassed = 1; + + thsvsGetVulkanImageMemoryBarrier(barrier, &srcStages, &dstStages, &vkBarrier); + + printf("Test: %s\n", testName); + + if (srcStages != expectedSrcStageMask) + { + printf("\tUnexpected source stage %0#10X\n", srcStages); + testPassed = 0; + } + + if (dstStages != expectedDstStageMask) + { + printf("\tUnexpected destination stage %0#10X\n", dstStages); + testPassed = 0; + } + + if (vkBarrier.srcAccessMask != expectedSrcAccessMask) + { + printf("\tUnexpected source access mask %0#10X\n", vkBarrier.srcAccessMask); + testPassed = 0; + } + + if (vkBarrier.dstAccessMask != expectedDstAccessMask) + { + printf("\tUnexpected destination access mask %0#10X\n", vkBarrier.dstAccessMask); + testPassed = 0; + } + + if (vkBarrier.oldLayout != expectedOldLayout) + { + printf("\tUnexpected old layout %d\n", vkBarrier.oldLayout); + testPassed = 0; + } + + if (vkBarrier.newLayout != expectedNewLayout) + { + printf("\tUnexpected new layout %d\n", vkBarrier.newLayout); + testPassed = 0; + } + + if (testPassed == 1) + printf("\tPASSED\n"); + else + printf("\tFAILED\n"); +} + +void image_barrier_test(const char* testName, + ThsvsAccessType prevAccess, + ThsvsAccessType nextAccess, + VkPipelineStageFlags expectedSrcStageMask, + VkPipelineStageFlags expectedDstStageMask, + VkAccessFlags expectedSrcAccessMask, + VkAccessFlags expectedDstAccessMask, + VkImageLayout expectedOldLayout, + VkImageLayout expectedNewLayout) +{ + image_barrier_test_array(testName, 1, &prevAccess, 1, &nextAccess, expectedSrcStageMask, expectedDstStageMask, expectedSrcAccessMask, expectedDstAccessMask, expectedOldLayout, expectedNewLayout); +} + +int main(int argc, char* argv[]) +{ + global_barrier_test("Compute write to storage buffer/image, Compute read from storage buffer/image", + THSVS_ACCESS_COMPUTE_SHADER_WRITE, + THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT); + + global_barrier_test("Compute read from storage buffer, Compute write from storage buffer", + THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER, + THSVS_ACCESS_COMPUTE_SHADER_WRITE, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 0); + + global_barrier_test("Compute write to storage buffer, Graphics read as index buffer", + THSVS_ACCESS_COMPUTE_SHADER_WRITE, + THSVS_ACCESS_INDEX_BUFFER, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_INDEX_READ_BIT); + + { + ThsvsAccessType prevAccesses[] = {THSVS_ACCESS_COMPUTE_SHADER_WRITE}; + ThsvsAccessType nextAccesses[] = {THSVS_ACCESS_INDEX_BUFFER, THSVS_ACCESS_COMPUTE_SHADER_READ_UNIFORM_BUFFER}; + global_barrier_test_array("Compute write to storage buffer, Graphics read as index buffer & Compute read as uniform buffer", + 1, prevAccesses, + 2, nextAccesses, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT); + } + + global_barrier_test("Compute write to storage buffer, Graphics read as indirect buffer", + THSVS_ACCESS_COMPUTE_SHADER_WRITE, + THSVS_ACCESS_INDIRECT_BUFFER, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT); + + image_barrier_test("Compute write to storage image, Graphics fragment read as sampled image", + THSVS_ACCESS_COMPUTE_SHADER_WRITE, + THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + { + ThsvsAccessType prevAccesses[] = {THSVS_ACCESS_COMPUTE_SHADER_WRITE}; + ThsvsAccessType nextAccesses[] = {THSVS_ACCESS_INDIRECT_BUFFER, THSVS_ACCESS_FRAGMENT_SHADER_READ_UNIFORM_BUFFER}; + global_barrier_test_array("Compute write to storage texel buffer, Graphics read as indirect buffer & fragment read as uniform buffer", + 1, prevAccesses, + 2, nextAccesses, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT); + } + + image_barrier_test("Graphics write to color attachment, Compute read from sampled image", + THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, + THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + image_barrier_test("Graphics write to depth attachment, Compute read from sampled image", + THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE, + THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + image_barrier_test("Graphics write to depth attachment, Graphics fragment read from input attachment", + THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE, + THSVS_ACCESS_FRAGMENT_SHADER_READ_DEPTH_STENCIL_INPUT_ATTACHMENT, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL); + + image_barrier_test("Graphics write to depth attachment, Graphics fragment read from sampled image", + THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE, + THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + image_barrier_test("Graphics write to color attachment, Graphics fragment read from input attachment", + THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, + THSVS_ACCESS_FRAGMENT_SHADER_READ_COLOR_INPUT_ATTACHMENT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + image_barrier_test("Graphics write to color attachment, Graphics fragment read from sampled image", + THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, + THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + image_barrier_test("Graphics write to color attachment, Graphics vertex read from sampled image", + THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, + THSVS_ACCESS_VERTEX_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + image_barrier_test("Graphics fragment read from sampled image, Graphics write to color attachment", + THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, + THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + 0, + 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + global_barrier_test("None, Transfer read from buffer", + THSVS_ACCESS_NONE, + THSVS_ACCESS_TRANSFER_READ, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 0); + + global_barrier_test("Transfer write to buffer, Graphics read from vertex buffer", + THSVS_ACCESS_TRANSFER_WRITE, + THSVS_ACCESS_VERTEX_BUFFER, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT); + + image_barrier_test("Transfer write to image, Graphics fragment read from sampled image", + THSVS_ACCESS_TRANSFER_WRITE, + THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + image_barrier_test("Graphics color attachment write, Presentation", + THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, + THSVS_ACCESS_PRESENT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + 0, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR); + + global_barrier_test("Full pipeline barrier", + THSVS_ACCESS_GENERAL, + THSVS_ACCESS_GENERAL, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT); +} diff --git a/contrib/simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h b/contrib/simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h new file mode 100644 index 0000000..cc55a72 --- /dev/null +++ b/contrib/simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h @@ -0,0 +1,1397 @@ +// Copyright (c) 2017-2019 Tobias Hector + +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +//// Simpler Vulkan Synchronization //// +/* +In an effort to make Vulkan synchronization more accessible, I created this +stb-inspired single-header library in order to somewhat simplify the core +synchronization mechanisms in Vulkan - pipeline barriers and events. + +Rather than the complex maze of enums and bit flags in Vulkan - many +combinations of which are invalid or nonsensical - this library collapses +this to a much shorter list of 40 distinct usage types, and a couple of +options for handling image layouts. + +Use of other synchronization mechanisms such as semaphores, fences and render +passes are not addressed in this API at present. + +USAGE + + #define the symbol THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION in + *one* C/C++ file before the #include of this file; the implementation + will be generated in that file. + +VERSION + + alpha.9 + + Alpha.9 adds the thsvsGetAccessInfo function to translate access types into a thsvsVkAccessInfo. + + +VERSION HISTORY + + alpha.8 + + Alpha.8 adds a host preinitialization state for linear images, as well as a number of new access sets for extensions released since the last update. + + alpha.7 + + Alpha.7 incorporates a number of fixes from @gwihlidal, and fixes + handling of pipeline stages in the presence of multiple access types or + barriers in light of other recent changes. + + alpha.6 + + Alpha.6 fixes a typo (VK_ACCESS_TYPE_MEMORY_READ|WRITE_BIT should have been VK_ACCESS_MEMORY_READ|WRITE_BIT), and sets the pipeline stage src and dst flag bits to VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT and VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT during initialization, not 0 as per alpha.5 + + alpha.5 + + Alpha.5 now correctly zeroes out the pipeline stage flags before trying to incrementally set bits on them... common theme here, whoops. + + alpha.4 + + Alpha.4 now correctly zeroes out the access types before trying to incrementally set bits on them (!) + + alpha.3 + + Alpha.3 changes the following: + + Uniform and vertex buffer access in one enum, matching D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER: + - THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER_OR_VERTEX_BUFFER + + Color read *and* write access, matching D3D12_RESOURCE_STATE_RENDER_TARGET: + - THSVS_ACCESS_COLOR_ATTACHMENT_READ_WRITE + + Also the "THSVS_ACCESS_*_SHADER_READ_SAMPLED_IMAGE" enums have been renamed to the form "THSVS_ACCESS_*_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER" + + alpha.2 + + Alpha.2 adds four new resource states for "ANY SHADER ACCESS": + - THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER + - THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE + - THSVS_ACCESS_ANY_SHADER_READ_OTHER + - THSVS_ACCESS_ANY_SHADER_WRITE + + alpha.1 + + Alpha.1 adds three new resource states: + - THSVS_ACCESS_GENERAL (Any access on the device) + - THSVS_ACCESS_DEPTH_ATTACHMENT_WRITE_STENCIL_READ_ONLY (Write access to only the depth aspect of a depth/stencil attachment) + - THSVS_ACCESS_STENCIL_ATTACHMENT_WRITE_DEPTH_READ_ONLY (Write access to only the stencil aspect of a depth/stencil attachment) + + It also fixes a couple of typos, and adds clarification as to when extensions need to be enabled to use a feature. + + alpha.0 + + This is the very first public release of this library; future revisions + of this API may change the API in an incompatible manner as feedback is + received. + Once the version becomes stable, incompatible changes will only be made + to major revisions of the API - minor revisions will only contain + bug fixes or minor additions. + +MEMORY ALLOCATION + + The thsvsCmdPipelineBarrier and thWaitEvents commands allocate temporary + storage for the Vulkan barrier equivalents in order to pass them to the + respective Vulkan commands. + + These use the `THSVS_TEMP_ALLOC(size)` and `THSVS_TEMP_FREE(x)` macros, + which are by default set to alloca(size) and ((void)(x)), respectively. + If you don't want to use stack space or would rather use your own + allocation strategy, these can be overridden by defining these macros + in before #include-ing the header file with + THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION defined. + + I'd rather avoid the need for these allocations in what are likely to be + high-traffic commands, but currently just want to ship something - may + revisit this at a future date based on feedback. + +EXPRESSIVENESS COMPARED TO RAW VULKAN + + Despite the fact that this API is fairly simple, it expresses 99% of + what you'd actually ever want to do in practice. + Adding the missing expressiveness would result in increased complexity + which didn't seem worth the trade off - however I would consider adding + something for them in future if it becomes an issue. + + Here's a list of known things you can't express: + + * Execution only dependencies cannot be expressed. + These are occasionally useful in conjunction with semaphores, or when + trying to be clever with scheduling - but their usage is both limited + and fairly tricky to get right anyway. + * Depth/Stencil Input Attachments can be read in a shader using either + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL or + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL - this library + *always* uses VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL. + It's possible (though highly unlikely) when aliasing images that this + results in unnecessary transitions. + +ERROR CHECKS + + By default, as with the Vulkan API, this library does NOT check for + errors. + However, a number of optional error checks (THSVS_ERROR_CHECK_*) can be + enabled by uncommenting the relevant #defines. + Currently, error checks simply assert at the point a failure is detected + and do not output an error message. + I certainly do not claim they capture *all* possible errors, but they + capture what should be some of the more common ones. + Use of the Vulkan Validation Layers in tandem with this library is + strongly recommended: + https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers + +ISSUES + + This header was clean of warnings using -Wall as of time of publishing + on both gcc 4.8.4 and clang 3.5, using the c99 standard. + + There's a potential pitfall in thsvsCmdPipelineBarrier and thsvsCmdWaitEvents + where alloca is used for temporary allocations. See MEMORY ALLOCATION + for more information. + + Testing of this library is so far extremely limited with no immediate + plans to add to that - so there's bound to be some amount of bugs. + Please raise these issues on the repo issue tracker, or provide a fix + via a pull request yourself if you're so inclined. +*/ + +#ifndef THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_H +#define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_H 1 + +#include + +/* +ThsvsAccessType defines all potential resource usages in the Vulkan API. +*/ +typedef enum ThsvsAccessType { + THSVS_ACCESS_NONE, // No access. Useful primarily for initialization + +// Read access + // Requires VK_NV_device_generated_commands to be enabled + THSVS_ACCESS_COMMAND_BUFFER_READ_NV, // Command buffer read operation as defined by NV_device_generated_commands + THSVS_ACCESS_INDIRECT_BUFFER, // Read as an indirect buffer for drawing or dispatch + THSVS_ACCESS_INDEX_BUFFER, // Read as an index buffer for drawing + THSVS_ACCESS_VERTEX_BUFFER, // Read as a vertex buffer for drawing + THSVS_ACCESS_VERTEX_SHADER_READ_UNIFORM_BUFFER, // Read as a uniform buffer in a vertex shader + THSVS_ACCESS_VERTEX_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, // Read as a sampled image/uniform texel buffer in a vertex shader + THSVS_ACCESS_VERTEX_SHADER_READ_OTHER, // Read as any other resource in a vertex shader + THSVS_ACCESS_TESSELLATION_CONTROL_SHADER_READ_UNIFORM_BUFFER, // Read as a uniform buffer in a tessellation control shader + THSVS_ACCESS_TESSELLATION_CONTROL_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, // Read as a sampled image/uniform texel buffer in a tessellation control shader + THSVS_ACCESS_TESSELLATION_CONTROL_SHADER_READ_OTHER, // Read as any other resource in a tessellation control shader + THSVS_ACCESS_TESSELLATION_EVALUATION_SHADER_READ_UNIFORM_BUFFER, // Read as a uniform buffer in a tessellation evaluation shader + THSVS_ACCESS_TESSELLATION_EVALUATION_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, // Read as a sampled image/uniform texel buffer in a tessellation evaluation shader + THSVS_ACCESS_TESSELLATION_EVALUATION_SHADER_READ_OTHER, // Read as any other resource in a tessellation evaluation shader + THSVS_ACCESS_GEOMETRY_SHADER_READ_UNIFORM_BUFFER, // Read as a uniform buffer in a geometry shader + THSVS_ACCESS_GEOMETRY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,// Read as a sampled image/uniform texel buffer in a geometry shader + THSVS_ACCESS_GEOMETRY_SHADER_READ_OTHER, // Read as any other resource in a geometry shader + THSVS_ACCESS_TASK_SHADER_READ_UNIFORM_BUFFER_NV, // Read as a uniform buffer in a task shader + THSVS_ACCESS_TASK_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER_NV, // Read as a sampled image/uniform texel buffer in a task shader + THSVS_ACCESS_TASK_SHADER_READ_OTHER_NV, // Read as any other resource in a task shader + THSVS_ACCESS_MESH_SHADER_READ_UNIFORM_BUFFER_NV, // Read as a uniform buffer in a mesh shader + THSVS_ACCESS_MESH_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER_NV, // Read as a sampled image/uniform texel buffer in a mesh shader + THSVS_ACCESS_MESH_SHADER_READ_OTHER_NV, // Read as any other resource in a mesh shader + THSVS_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_EXT, // Read as a transform feedback counter buffer + THSVS_ACCESS_FRAGMENT_DENSITY_MAP_READ_EXT, // Read as a fragment density map image + THSVS_ACCESS_SHADING_RATE_READ_NV, // Read as a shading rate image + THSVS_ACCESS_FRAGMENT_SHADER_READ_UNIFORM_BUFFER, // Read as a uniform buffer in a fragment shader + THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,// Read as a sampled image/uniform texel buffer in a fragment shader + THSVS_ACCESS_FRAGMENT_SHADER_READ_COLOR_INPUT_ATTACHMENT, // Read as an input attachment with a color format in a fragment shader + THSVS_ACCESS_FRAGMENT_SHADER_READ_DEPTH_STENCIL_INPUT_ATTACHMENT, // Read as an input attachment with a depth/stencil format in a fragment shader + THSVS_ACCESS_FRAGMENT_SHADER_READ_OTHER, // Read as any other resource in a fragment shader + THSVS_ACCESS_COLOR_ATTACHMENT_READ, // Read by standard blending/logic operations or subpass load operations + THSVS_ACCESS_COLOR_ATTACHMENT_ADVANCED_BLENDING_EXT, // Read by advanced blending, standard blending, logic operations, or subpass load operations + THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ, // Read by depth/stencil tests or subpass load operations + THSVS_ACCESS_COMPUTE_SHADER_READ_UNIFORM_BUFFER, // Read as a uniform buffer in a compute shader + THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, // Read as a sampled image/uniform texel buffer in a compute shader + THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER, // Read as any other resource in a compute shader + THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER, // Read as a uniform buffer in any shader + THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER_OR_VERTEX_BUFFER, // Read as a uniform buffer in any shader, or a vertex buffer + THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, // Read as a sampled image in any shader + THSVS_ACCESS_ANY_SHADER_READ_OTHER, // Read as any other resource (excluding attachments) in any shader + THSVS_ACCESS_TRANSFER_READ, // Read as the source of a transfer operation + THSVS_ACCESS_HOST_READ, // Read on the host + + // Requires VK_KHR_swapchain to be enabled + THSVS_ACCESS_PRESENT, // Read by the presentation engine (i.e. vkQueuePresentKHR) + + // Requires VK_EXT_conditional_rendering to be enabled + THSVS_ACCESS_CONDITIONAL_RENDERING_READ_EXT, // Read by conditional rendering + + // Requires VK_NV_ray_tracing to be enabled + THSVS_ACCESS_RAY_TRACING_SHADER_ACCELERATION_STRUCTURE_READ_NV, // Read by a ray tracing shader as an acceleration structure + THSVS_ACCESS_ACCELERATION_STRUCTURE_BUILD_READ_NV, // Read as an acceleration structure during a build + + // Read accesses end + THSVS_END_OF_READ_ACCESS, + +// Write access + // Requires VK_NV_device_generated_commands to be enabled + THSVS_ACCESS_COMMAND_BUFFER_WRITE_NV, // Command buffer write operation + THSVS_ACCESS_VERTEX_SHADER_WRITE, // Written as any resource in a vertex shader + THSVS_ACCESS_TESSELLATION_CONTROL_SHADER_WRITE, // Written as any resource in a tessellation control shader + THSVS_ACCESS_TESSELLATION_EVALUATION_SHADER_WRITE, // Written as any resource in a tessellation evaluation shader + THSVS_ACCESS_GEOMETRY_SHADER_WRITE, // Written as any resource in a geometry shader + + // Requires VK_NV_mesh_shading to be enabled + THSVS_ACCESS_TASK_SHADER_WRITE_NV, // Written as any resource in a task shader + THSVS_ACCESS_MESH_SHADER_WRITE_NV, // Written as any resource in a mesh shader + + // Requires VK_EXT_transform_feedback to be enabled + THSVS_ACCESS_TRANSFORM_FEEDBACK_WRITE_EXT, // Written as a transform feedback buffer + THSVS_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_EXT, // Written as a transform feedback counter buffer + + THSVS_ACCESS_FRAGMENT_SHADER_WRITE, // Written as any resource in a fragment shader + THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, // Written as a color attachment during rendering, or via a subpass store op + THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE, // Written as a depth/stencil attachment during rendering, or via a subpass store op + + // Requires VK_KHR_maintenance2 to be enabled + THSVS_ACCESS_DEPTH_ATTACHMENT_WRITE_STENCIL_READ_ONLY, // Written as a depth aspect of a depth/stencil attachment during rendering, whilst the stencil aspect is read-only + THSVS_ACCESS_STENCIL_ATTACHMENT_WRITE_DEPTH_READ_ONLY, // Written as a stencil aspect of a depth/stencil attachment during rendering, whilst the depth aspect is read-only + + THSVS_ACCESS_COMPUTE_SHADER_WRITE, // Written as any resource in a compute shader + THSVS_ACCESS_ANY_SHADER_WRITE, // Written as any resource in any shader + THSVS_ACCESS_TRANSFER_WRITE, // Written as the destination of a transfer operation + THSVS_ACCESS_HOST_PREINITIALIZED, // Data pre-filled by host before device access starts + THSVS_ACCESS_HOST_WRITE, // Written on the host + + // Requires VK_NV_ray_tracing to be enabled + THSVS_ACCESS_ACCELERATION_STRUCTURE_BUILD_WRITE_NV, // Written as an acceleration structure during a build + + THSVS_ACCESS_COLOR_ATTACHMENT_READ_WRITE, // Read or written as a color attachment during rendering +// General access + THSVS_ACCESS_GENERAL, // Covers any access - useful for debug, generally avoid for performance reasons + +// Number of access types + THSVS_NUM_ACCESS_TYPES +} ThsvsAccessType; + +/* +ThsvsImageLayout defines a handful of layout options for images. +Rather than a list of all possible image layouts, this reduced list is +correlated with the access types to map to the correct Vulkan layouts. +THSVS_IMAGE_LAYOUT_OPTIMAL is usually preferred. +*/ +typedef enum ThsvsImageLayout { + THSVS_IMAGE_LAYOUT_OPTIMAL, // Choose the most optimal layout for each usage. Performs layout transitions as appropriate for the access. + THSVS_IMAGE_LAYOUT_GENERAL, // Layout accessible by all Vulkan access types on a device - no layout transitions except for presentation + + // Requires VK_KHR_shared_presentable_image to be enabled. Can only be used for shared presentable images (i.e. single-buffered swap chains). + THSVS_IMAGE_LAYOUT_GENERAL_AND_PRESENTATION // As GENERAL, but also allows presentation engines to access it - no layout transitions +} ThsvsImageLayout; + +/* +Global barriers define a set of accesses on multiple resources at once. +If a buffer or image doesn't require a queue ownership transfer, or an image +doesn't require a layout transition (e.g. you're using one of the GENERAL +layouts) then a global barrier should be preferred. +Simply define the previous and next access types of resources affected. +*/ +typedef struct ThsvsGlobalBarrier { + uint32_t prevAccessCount; + const ThsvsAccessType* pPrevAccesses; + uint32_t nextAccessCount; + const ThsvsAccessType* pNextAccesses; +} ThsvsGlobalBarrier; + +/* +Buffer barriers should only be used when a queue family ownership transfer +is required - prefer global barriers at all other times. + +Access types are defined in the same way as for a global memory barrier, but +they only affect the buffer range identified by buffer, offset and size, +rather than all resources. +srcQueueFamilyIndex and dstQueueFamilyIndex will be passed unmodified into a +VkBufferMemoryBarrier. + +A buffer barrier defining a queue ownership transfer needs to be executed +twice - once by a queue in the source queue family, and then once again by a +queue in the destination queue family, with a semaphore guaranteeing +execution order between them. +*/ +typedef struct ThsvsBufferBarrier { + uint32_t prevAccessCount; + const ThsvsAccessType* pPrevAccesses; + uint32_t nextAccessCount; + const ThsvsAccessType* pNextAccesses; + uint32_t srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize size; +} ThsvsBufferBarrier; + +/* +Image barriers should only be used when a queue family ownership transfer +or an image layout transition is required - prefer global barriers at all +other times. +In general it is better to use image barriers with THSVS_IMAGE_LAYOUT_OPTIMAL +than it is to use global barriers with images using either of the +THSVS_IMAGE_LAYOUT_GENERAL* layouts. + +Access types are defined in the same way as for a global memory barrier, but +they only affect the image subresource range identified by image and +subresourceRange, rather than all resources. +srcQueueFamilyIndex, dstQueueFamilyIndex, image, and subresourceRange will +be passed unmodified into a VkImageMemoryBarrier. + +An image barrier defining a queue ownership transfer needs to be executed +twice - once by a queue in the source queue family, and then once again by a +queue in the destination queue family, with a semaphore guaranteeing +execution order between them. + +If discardContents is set to true, the contents of the image become +undefined after the barrier is executed, which can result in a performance +boost over attempting to preserve the contents. +This is particularly useful for transient images where the contents are +going to be immediately overwritten. A good example of when to use this is +when an application re-uses a presented image after vkAcquireNextImageKHR. +*/ +typedef struct ThsvsImageBarrier { + uint32_t prevAccessCount; + const ThsvsAccessType* pPrevAccesses; + uint32_t nextAccessCount; + const ThsvsAccessType* pNextAccesses; + ThsvsImageLayout prevLayout; + ThsvsImageLayout nextLayout; + VkBool32 discardContents; + uint32_t srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; + VkImage image; + VkImageSubresourceRange subresourceRange; +} ThsvsImageBarrier; + +/* +Mapping function that translates a set of accesses into the corresponding +pipeline stages, VkAccessFlags, and image layout. +*/ +void thsvsGetAccessInfo( + uint32_t accessCount, + const ThsvsAccessType* pAccesses, + VkPipelineStageFlags* pStageMask, + VkAccessFlags* pAccessMask, + VkImageLayout* pImageLayout, + bool* pHasWriteAccess); + +#ifdef __cplusplus +/* +Mapping function that translates a global barrier into a set of source and +destination pipeline stages, and a VkMemoryBarrier, that can be used with +Vulkan's synchronization methods. +*/ +void thsvsGetVulkanMemoryBarrier( + const ThsvsGlobalBarrier& thBarrier, + VkPipelineStageFlags* pSrcStages, + VkPipelineStageFlags* pDstStages, + VkMemoryBarrier* pVkBarrier); + +/* +Mapping function that translates a buffer barrier into a set of source and +destination pipeline stages, and a VkBufferMemoryBarrier, that can be used +with Vulkan's synchronization methods. +*/ +void thsvsGetVulkanBufferMemoryBarrier( + const ThsvsBufferBarrier& thBarrier, + VkPipelineStageFlags* pSrcStages, + VkPipelineStageFlags* pDstStages, + VkBufferMemoryBarrier* pVkBarrier); + +/* +Mapping function that translates an image barrier into a set of source and +destination pipeline stages, and a VkBufferMemoryBarrier, that can be used +with Vulkan's synchronization methods. +*/ +void thsvsGetVulkanImageMemoryBarrier( + const ThsvsImageBarrier& thBarrier, + VkPipelineStageFlags* pSrcStages, + VkPipelineStageFlags* pDstStages, + VkImageMemoryBarrier* pVkBarrier); +#endif + +/* +Simplified wrapper around vkCmdPipelineBarrier. + +The mapping functions defined above are used to translate the passed in +barrier definitions into a set of pipeline stages and native Vulkan memory +barriers to be passed to vkCmdPipelineBarrier. + +commandBuffer is passed unmodified to vkCmdPipelineBarrier. +*/ +void thsvsCmdPipelineBarrier( + VkCommandBuffer commandBuffer, + const ThsvsGlobalBarrier* pGlobalBarrier, + uint32_t bufferBarrierCount, + const ThsvsBufferBarrier* pBufferBarriers, + uint32_t imageBarrierCount, + const ThsvsImageBarrier* pImageBarriers); + +/* +Wrapper around vkCmdSetEvent. + +Sets an event when the accesses defined by pPrevAccesses are completed. + +commandBuffer and event are passed unmodified to vkCmdSetEvent. +*/ +void thsvsCmdSetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + uint32_t prevAccessCount, + const ThsvsAccessType* pPrevAccesses); + +/* +Wrapper around vkCmdResetEvent. + +Resets an event when the accesses defined by pPrevAccesses are completed. + +commandBuffer and event are passed unmodified to vkCmdResetEvent. +*/ +void thsvsCmdResetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + uint32_t prevAccessCount, + const ThsvsAccessType* pPrevAccesses); + +/* +Simplified wrapper around vkCmdWaitEvents. + +The mapping functions defined above are used to translate the passed in +barrier definitions into a set of pipeline stages and native Vulkan memory +barriers to be passed to vkCmdPipelineBarrier. + +commandBuffer, eventCount, and pEvents are passed unmodified to +vkCmdWaitEvents. +*/ +void thsvsCmdWaitEvents( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + const ThsvsGlobalBarrier* pGlobalBarrier, + uint32_t bufferBarrierCount, + const ThsvsBufferBarrier* pBufferBarriers, + uint32_t imageBarrierCount, + const ThsvsImageBarrier* pImageBarriers); + +#endif // THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_H + +#ifdef THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION + +#include + +//// Optional Error Checking //// +/* +Checks for barriers defining multiple usages that have different layouts +*/ +// #define THSVS_ERROR_CHECK_MIXED_IMAGE_LAYOUT + +/* +Checks if an image/buffer barrier is used when a global barrier would suffice +*/ +// #define THSVS_ERROR_CHECK_COULD_USE_GLOBAL_BARRIER + +/* +Checks if a write access is listed alongside any other access - if so it +points to a potential data hazard that you need to synchronize separately. +In some cases it may simply be over-synchronization however, but it's usually +worth checking. +*/ +// #define THSVS_ERROR_CHECK_POTENTIAL_HAZARD + +/* +Checks if a variety of table lookups (like the access map) are within +a valid range. +*/ +// #define THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + +//// Temporary Memory Allocation //// +/* +Override these if you can't afford the stack space or just want to use a +custom temporary allocator. +These are currently used exclusively to allocate Vulkan memory barriers in +the API, one for each Buffer or Image barrier passed into the pipeline and +event functions. +May consider other allocation strategies in future. +*/ + +// Alloca inclusion code below copied from +// https://github.com/nothings/stb/blob/master/stb_vorbis.c + +// find definition of alloca if it's not in stdlib.h: +#if defined(_MSC_VER) || defined(__MINGW32__) + #include +#endif +#if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__) + #include +#endif + +#if defined(THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE) || \ + defined(THSVS_ERROR_CHECK_COULD_USE_GLOBAL_BARRIER) || \ + defined(THSVS_ERROR_CHECK_MIXED_IMAGE_LAYOUT) || \ + defined(THSVS_ERROR_CHECK_POTENTIAL_HAZARD) + #include +#endif + +#if !defined(THSVS_TEMP_ALLOC) +#define THSVS_TEMP_ALLOC(size) (alloca(size)) +#endif + +#if !defined(THSVS_TEMP_FREE) +#define THSVS_TEMP_FREE(x) ((void)(x)) +#endif + +typedef struct ThsvsVkAccessInfo { + VkPipelineStageFlags stageMask; + VkAccessFlags accessMask; + VkImageLayout imageLayout; +} ThsvsVkAccessInfo; + +const ThsvsVkAccessInfo ThsvsAccessMap[THSVS_NUM_ACCESS_TYPES] = { + // THSVS_ACCESS_NONE + { 0, + 0, + VK_IMAGE_LAYOUT_UNDEFINED}, + +// Read Access + // THSVS_ACCESS_COMMAND_BUFFER_READ_NV + { VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV, + VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_INDIRECT_BUFFER + { VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + + // THSVS_ACCESS_INDEX_BUFFER + { VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_ACCESS_INDEX_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_VERTEX_BUFFER + { VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_VERTEX_SHADER_READ_UNIFORM_BUFFER + { VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_VERTEX_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER + { VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_VERTEX_SHADER_READ_OTHER + { VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_TESSELLATION_CONTROL_SHADER_READ_UNIFORM_BUFFER + { VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_TESSELLATION_CONTROL_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER + { VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_TESSELLATION_CONTROL_SHADER_READ_OTHER + { VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_TESSELLATION_EVALUATION_SHADER_READ_UNIFORM_BUFFER + { VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_TESSELLATION_EVALUATION_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER + { VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_TESSELLATION_EVALUATION_SHADER_READ_OTHER + { VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_GEOMETRY_SHADER_READ_UNIFORM_BUFFER + { VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_GEOMETRY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER + { VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_GEOMETRY_SHADER_READ_OTHER + { VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_TASK_SHADER_READ_UNIFORM_BUFFER_NV + { VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_TASK_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER_NV + { VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_TASK_SHADER_READ_OTHER_NV + { VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_MESH_SHADER_READ_UNIFORM_BUFFER_NV + { VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_MESH_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER_NV + { VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_MESH_SHADER_READ_OTHER_NV + { VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_EXT + { VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_FRAGMENT_DENSITY_MAP_READ_EXT + { VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT, + VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT, + VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT}, + // THSVS_ACCESS_SHADING_RATE_READ_NV + { VK_PIPELINE_STAGE_SHADING_RATE_IMAGE_BIT_NV, + VK_ACCESS_SHADING_RATE_IMAGE_READ_BIT_NV, + VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV}, + + // THSVS_ACCESS_FRAGMENT_SHADER_READ_UNIFORM_BUFFER + { VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER + { VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_FRAGMENT_SHADER_READ_COLOR_INPUT_ATTACHMENT + { VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_FRAGMENT_SHADER_READ_DEPTH_STENCIL_INPUT_ATTACHMENT + { VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_FRAGMENT_SHADER_READ_OTHER + { VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_COLOR_ATTACHMENT_READ + { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}, + // THSVS_ACCESS_COLOR_ATTACHMENT_ADVANCED_BLENDING_EXT + { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}, + // THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ + { VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL}, + + // THSVS_ACCESS_COMPUTE_SHADER_READ_UNIFORM_BUFFER + { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER + { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER + { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER + { VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER_OR_VERTEX_BUFFER + { VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE + { VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + // THSVS_ACCESS_ANY_SHADER_READ_OTHER + { VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_TRANSFER_READ + { VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL}, + // THSVS_ACCESS_HOST_READ + { VK_PIPELINE_STAGE_HOST_BIT, + VK_ACCESS_HOST_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_PRESENT + { 0, + 0, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR}, + // THSVS_ACCESS_CONDITIONAL_RENDERING_READ_EXT + { VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, + VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, + VK_IMAGE_LAYOUT_UNDEFINED}, + + // THSVS_ACCESS_RAY_TRACING_SHADER_ACCELERATION_STRUCTURE_READ_NV + { VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV, + VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_ACCELERATION_STRUCTURE_BUILD_READ_NV + { VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, + VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_END_OF_READ_ACCESS + { 0, + 0, + VK_IMAGE_LAYOUT_UNDEFINED}, + +// Write access + // THSVS_ACCESS_COMMAND_BUFFER_WRITE_NV + { VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV, + VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_NV, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_VERTEX_SHADER_WRITE + { VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_TESSELLATION_CONTROL_SHADER_WRITE + { VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_TESSELLATION_EVALUATION_SHADER_WRITE + { VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_GEOMETRY_SHADER_WRITE + { VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_TASK_SHADER_WRITE_NV + { VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_MESH_SHADER_WRITE_NV + { VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_TRANSFORM_FEEDBACK_WRITE_EXT + { VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_EXT + { VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, + VK_IMAGE_LAYOUT_UNDEFINED}, + // THSVS_ACCESS_FRAGMENT_SHADER_WRITE + { VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_COLOR_ATTACHMENT_WRITE + { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}, + // THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE + { VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}, + // THSVS_ACCESS_DEPTH_ATTACHMENT_WRITE_STENCIL_READ_ONLY + { VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR}, + // THSVS_ACCESS_STENCIL_ATTACHMENT_WRITE_DEPTH_READ_ONLY + { VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR}, + + // THSVS_ACCESS_COMPUTE_SHADER_WRITE + { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_ANY_SHADER_WRITE + { VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + + // THSVS_ACCESS_TRANSFER_WRITE + { VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL}, + // THSVS_ACCESS_HOST_PREINITIALIZED + { VK_PIPELINE_STAGE_HOST_BIT, + VK_ACCESS_HOST_WRITE_BIT, + VK_IMAGE_LAYOUT_PREINITIALIZED}, + // THSVS_ACCESS_HOST_WRITE + { VK_PIPELINE_STAGE_HOST_BIT, + VK_ACCESS_HOST_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL}, + // THSVS_ACCESS_ACCELERATION_STRUCTURE_BUILD_WRITE_NV + { VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, + VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV, + VK_IMAGE_LAYOUT_UNDEFINED}, + + // THSVS_ACCESS_COLOR_ATTACHMENT_READ_WRITE + { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}, + // THSVS_ACCESS_GENERAL + { VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL} +}; + +void thsvsGetAccessInfo( + uint32_t accessCount, + const ThsvsAccessType* pAccesses, + VkPipelineStageFlags* pStageMask, + VkAccessFlags* pAccessMask, + VkImageLayout* pImageLayout, + bool* pHasWriteAccess) +{ + *pStageMask = 0; + *pAccessMask = 0; + *pImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + *pHasWriteAccess = false; + + for (uint32_t i = 0; i < accessCount; ++i) + { + ThsvsAccessType access = pAccesses[i]; + const ThsvsVkAccessInfo* pAccessInfo = &ThsvsAccessMap[access]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the previous access index is a valid range for the lookup + assert(access < THSVS_NUM_ACCESS_TYPES); +#endif + +#ifdef THSVS_ERROR_CHECK_POTENTIAL_HAZARD + // Asserts that the access is a read, else it's a write and it should appear on its own. + assert(access < THSVS_END_OF_READ_ACCESS || accessCount == 1); +#endif + + *pStageMask |= pAccessInfo->stageMask; + + if (access > THSVS_END_OF_READ_ACCESS) + *pHasWriteAccess = true; + + *pAccessMask |= pAccessInfo->accessMask; + + VkImageLayout layout = pAccessInfo->imageLayout; + +#ifdef THSVS_ERROR_CHECK_MIXED_IMAGE_LAYOUT + assert(*pImageLayout == VK_IMAGE_LAYOUT_UNDEFINED || + *pImageLayout == layout); +#endif + + *pImageLayout = layout; + } +} + +void thsvsGetVulkanMemoryBarrier( + const ThsvsGlobalBarrier& thBarrier, + VkPipelineStageFlags* pSrcStages, + VkPipelineStageFlags* pDstStages, + VkMemoryBarrier* pVkBarrier) +{ + *pSrcStages = 0; + *pDstStages = 0; + pVkBarrier->sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + pVkBarrier->pNext = NULL; + pVkBarrier->srcAccessMask = 0; + pVkBarrier->dstAccessMask = 0; + + for (uint32_t i = 0; i < thBarrier.prevAccessCount; ++i) + { + ThsvsAccessType prevAccess = thBarrier.pPrevAccesses[i]; + const ThsvsVkAccessInfo* pPrevAccessInfo = &ThsvsAccessMap[prevAccess]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the previous access index is a valid range for the lookup + assert(prevAccess < THSVS_NUM_ACCESS_TYPES); +#endif + +#ifdef THSVS_ERROR_CHECK_POTENTIAL_HAZARD + // Asserts that the access is a read, else it's a write and it should appear on its own. + assert(prevAccess < THSVS_END_OF_READ_ACCESS || thBarrier.prevAccessCount == 1); +#endif + + *pSrcStages |= pPrevAccessInfo->stageMask; + + // Add appropriate availability operations - for writes only. + if (prevAccess > THSVS_END_OF_READ_ACCESS) + pVkBarrier->srcAccessMask |= pPrevAccessInfo->accessMask; + } + + for (uint32_t i = 0; i < thBarrier.nextAccessCount; ++i) + { + ThsvsAccessType nextAccess = thBarrier.pNextAccesses[i]; + const ThsvsVkAccessInfo* pNextAccessInfo = &ThsvsAccessMap[nextAccess]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the next access index is a valid range for the lookup + assert(nextAccess < THSVS_NUM_ACCESS_TYPES); +#endif + +#ifdef THSVS_ERROR_CHECK_POTENTIAL_HAZARD + // Asserts that the access is a read, else it's a write and it should appear on its own. + assert(nextAccess < THSVS_END_OF_READ_ACCESS || thBarrier.nextAccessCount == 1); +#endif + *pDstStages |= pNextAccessInfo->stageMask; + + // Add visibility operations as necessary. + // If the src access mask is zero, this is a WAR hazard (or for some reason a "RAR"), + // so the dst access mask can be safely zeroed as these don't need visibility. + if (pVkBarrier->srcAccessMask != 0) + pVkBarrier->dstAccessMask |= pNextAccessInfo->accessMask; + } + + // Ensure that the stage masks are valid if no stages were determined + if (*pSrcStages == 0) + *pSrcStages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + if (*pDstStages == 0) + *pDstStages = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; +} + +void thsvsGetVulkanBufferMemoryBarrier( + const ThsvsBufferBarrier& thBarrier, + VkPipelineStageFlags* pSrcStages, + VkPipelineStageFlags* pDstStages, + VkBufferMemoryBarrier* pVkBarrier) +{ + *pSrcStages = 0; + *pDstStages = 0; + pVkBarrier->sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + pVkBarrier->pNext = NULL; + pVkBarrier->srcAccessMask = 0; + pVkBarrier->dstAccessMask = 0; + pVkBarrier->srcQueueFamilyIndex = thBarrier.srcQueueFamilyIndex; + pVkBarrier->dstQueueFamilyIndex = thBarrier.dstQueueFamilyIndex; + pVkBarrier->buffer = thBarrier.buffer; + pVkBarrier->offset = thBarrier.offset; + pVkBarrier->size = thBarrier.size; + +#ifdef THSVS_ERROR_CHECK_COULD_USE_GLOBAL_BARRIER + assert(pVkBarrier->srcQueueFamilyIndex != pVkBarrier->dstQueueFamilyIndex); +#endif + + for (uint32_t i = 0; i < thBarrier.prevAccessCount; ++i) + { + ThsvsAccessType prevAccess = thBarrier.pPrevAccesses[i]; + const ThsvsVkAccessInfo* pPrevAccessInfo = &ThsvsAccessMap[prevAccess]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the previous access index is a valid range for the lookup + assert(prevAccess < THSVS_NUM_ACCESS_TYPES); +#endif + +#ifdef THSVS_ERROR_CHECK_POTENTIAL_HAZARD + // Asserts that the access is a read, else it's a write and it should appear on its own. + assert(prevAccess < THSVS_END_OF_READ_ACCESS || thBarrier.prevAccessCount == 1); +#endif + + *pSrcStages |= pPrevAccessInfo->stageMask; + + // Add appropriate availability operations - for writes only. + if (prevAccess > THSVS_END_OF_READ_ACCESS) + pVkBarrier->srcAccessMask |= pPrevAccessInfo->accessMask; + } + + for (uint32_t i = 0; i < thBarrier.nextAccessCount; ++i) + { + ThsvsAccessType nextAccess = thBarrier.pNextAccesses[i]; + const ThsvsVkAccessInfo* pNextAccessInfo = &ThsvsAccessMap[nextAccess]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the next access index is a valid range for the lookup + assert(nextAccess < THSVS_NUM_ACCESS_TYPES); +#endif + +#ifdef THSVS_ERROR_CHECK_POTENTIAL_HAZARD + // Asserts that the access is a read, else it's a write and it should appear on its own. + assert(nextAccess < THSVS_END_OF_READ_ACCESS || thBarrier.nextAccessCount == 1); +#endif + + *pDstStages |= pNextAccessInfo->stageMask; + + // Add visibility operations as necessary. + // If the src access mask is zero, this is a WAR hazard (or for some reason a "RAR"), + // so the dst access mask can be safely zeroed as these don't need visibility. + if (pVkBarrier->srcAccessMask != 0) + pVkBarrier->dstAccessMask |= pNextAccessInfo->accessMask; + } + + // Ensure that the stage masks are valid if no stages were determined + if (*pSrcStages == 0) + *pSrcStages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + if (*pDstStages == 0) + *pDstStages = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; +} + +void thsvsGetVulkanImageMemoryBarrier( + const ThsvsImageBarrier& thBarrier, + VkPipelineStageFlags* pSrcStages, + VkPipelineStageFlags* pDstStages, + VkImageMemoryBarrier* pVkBarrier) +{ + *pSrcStages = 0; + *pDstStages = 0; + pVkBarrier->sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + pVkBarrier->pNext = NULL; + pVkBarrier->srcAccessMask = 0; + pVkBarrier->dstAccessMask = 0; + pVkBarrier->srcQueueFamilyIndex = thBarrier.srcQueueFamilyIndex; + pVkBarrier->dstQueueFamilyIndex = thBarrier.dstQueueFamilyIndex; + pVkBarrier->image = thBarrier.image; + pVkBarrier->subresourceRange = thBarrier.subresourceRange; + pVkBarrier->oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + pVkBarrier->newLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + for (uint32_t i = 0; i < thBarrier.prevAccessCount; ++i) + { + ThsvsAccessType prevAccess = thBarrier.pPrevAccesses[i]; + const ThsvsVkAccessInfo* pPrevAccessInfo = &ThsvsAccessMap[prevAccess]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the previous access index is a valid range for the lookup + assert(prevAccess < THSVS_NUM_ACCESS_TYPES); +#endif + +#ifdef THSVS_ERROR_CHECK_POTENTIAL_HAZARD + // Asserts that the access is a read, else it's a write and it should appear on its own. + assert(prevAccess < THSVS_END_OF_READ_ACCESS || thBarrier.prevAccessCount == 1); +#endif + + *pSrcStages |= pPrevAccessInfo->stageMask; + + // Add appropriate availability operations - for writes only. + if (prevAccess > THSVS_END_OF_READ_ACCESS) + pVkBarrier->srcAccessMask |= pPrevAccessInfo->accessMask; + + if (thBarrier.discardContents == VK_TRUE) + { + pVkBarrier->oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + } + else + { + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; + + switch(thBarrier.prevLayout) + { + case THSVS_IMAGE_LAYOUT_GENERAL: + if (prevAccess == THSVS_ACCESS_PRESENT) + layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + else + layout = VK_IMAGE_LAYOUT_GENERAL; + break; + case THSVS_IMAGE_LAYOUT_OPTIMAL: + layout = pPrevAccessInfo->imageLayout; + break; + case THSVS_IMAGE_LAYOUT_GENERAL_AND_PRESENTATION: + layout = VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR; + break; + } + + +#ifdef THSVS_ERROR_CHECK_MIXED_IMAGE_LAYOUT + assert(pVkBarrier->oldLayout == VK_IMAGE_LAYOUT_UNDEFINED || + pVkBarrier->oldLayout == layout); +#endif + pVkBarrier->oldLayout = layout; + } + } + + for (uint32_t i = 0; i < thBarrier.nextAccessCount; ++i) + { + ThsvsAccessType nextAccess = thBarrier.pNextAccesses[i]; + const ThsvsVkAccessInfo* pNextAccessInfo = &ThsvsAccessMap[nextAccess]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the next access index is a valid range for the lookup + assert(nextAccess < THSVS_NUM_ACCESS_TYPES); +#endif + +#ifdef THSVS_ERROR_CHECK_POTENTIAL_HAZARD + // Asserts that the access is a read, else it's a write and it should appear on its own. + assert(nextAccess < THSVS_END_OF_READ_ACCESS || thBarrier.nextAccessCount == 1); +#endif + + *pDstStages |= pNextAccessInfo->stageMask; + + // Add visibility operations as necessary. + // If the src access mask is zero, this is a WAR hazard (or for some reason a "RAR"), + // so the dst access mask can be safely zeroed as these don't need visibility. + if (pVkBarrier->srcAccessMask != 0) + pVkBarrier->dstAccessMask |= pNextAccessInfo->accessMask; + + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; + switch(thBarrier.nextLayout) + { + case THSVS_IMAGE_LAYOUT_GENERAL: + if (nextAccess == THSVS_ACCESS_PRESENT) + layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + else + layout = VK_IMAGE_LAYOUT_GENERAL; + break; + case THSVS_IMAGE_LAYOUT_OPTIMAL: + layout = pNextAccessInfo->imageLayout; + break; + case THSVS_IMAGE_LAYOUT_GENERAL_AND_PRESENTATION: + layout = VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR; + break; + } + +#ifdef THSVS_ERROR_CHECK_MIXED_IMAGE_LAYOUT + assert(pVkBarrier->newLayout == VK_IMAGE_LAYOUT_UNDEFINED || + pVkBarrier->newLayout == layout); +#endif + pVkBarrier->newLayout = layout; + } + +#ifdef THSVS_ERROR_CHECK_COULD_USE_GLOBAL_BARRIER + assert(pVkBarrier->newLayout != pVkBarrier->oldLayout || + pVkBarrier->srcQueueFamilyIndex != pVkBarrier->dstQueueFamilyIndex); +#endif + + // Ensure that the stage masks are valid if no stages were determined + if (*pSrcStages == 0) + *pSrcStages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + if (*pDstStages == 0) + *pDstStages = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; +} + +void thsvsCmdPipelineBarrier( + VkCommandBuffer commandBuffer, + const ThsvsGlobalBarrier* pGlobalBarrier, + uint32_t bufferBarrierCount, + const ThsvsBufferBarrier* pBufferBarriers, + uint32_t imageBarrierCount, + const ThsvsImageBarrier* pImageBarriers) +{ + VkMemoryBarrier memoryBarrier; + // Vulkan pipeline barrier command parameters + // commandBuffer; + VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + uint32_t memoryBarrierCount = (pGlobalBarrier != NULL) ? 1 : 0; + VkMemoryBarrier* pMemoryBarriers = (pGlobalBarrier != NULL) ? &memoryBarrier : NULL; + uint32_t bufferMemoryBarrierCount = bufferBarrierCount; + VkBufferMemoryBarrier* pBufferMemoryBarriers = NULL; + uint32_t imageMemoryBarrierCount = imageBarrierCount; + VkImageMemoryBarrier* pImageMemoryBarriers = NULL; + + // Global memory barrier + if (pGlobalBarrier != NULL) + { + VkPipelineStageFlags tempSrcStageMask = 0; + VkPipelineStageFlags tempDstStageMask = 0; + thsvsGetVulkanMemoryBarrier(*pGlobalBarrier, &tempSrcStageMask, &tempDstStageMask, pMemoryBarriers); + srcStageMask |= tempSrcStageMask; + dstStageMask |= tempDstStageMask; + } + + // Buffer memory barriers + if (bufferBarrierCount > 0) + { + pBufferMemoryBarriers = (VkBufferMemoryBarrier*)THSVS_TEMP_ALLOC(sizeof(VkBufferMemoryBarrier) * bufferMemoryBarrierCount); + + VkPipelineStageFlags tempSrcStageMask = 0; + VkPipelineStageFlags tempDstStageMask = 0; + for (uint32_t i = 0; i < bufferBarrierCount; ++i) + { + thsvsGetVulkanBufferMemoryBarrier(pBufferBarriers[i], &tempSrcStageMask, &tempDstStageMask, &pBufferMemoryBarriers[i]); + srcStageMask |= tempSrcStageMask; + dstStageMask |= tempDstStageMask; + } + } + + // Image memory barriers + if (imageBarrierCount > 0) + { + pImageMemoryBarriers = (VkImageMemoryBarrier*)THSVS_TEMP_ALLOC(sizeof(VkImageMemoryBarrier) * imageMemoryBarrierCount); + + VkPipelineStageFlags tempSrcStageMask = 0; + VkPipelineStageFlags tempDstStageMask = 0; + for (uint32_t i = 0; i < imageBarrierCount; ++i) + { + thsvsGetVulkanImageMemoryBarrier(pImageBarriers[i], &tempSrcStageMask, &tempDstStageMask, &pImageMemoryBarriers[i]); + srcStageMask |= tempSrcStageMask; + dstStageMask |= tempDstStageMask; + } + } + + vkCmdPipelineBarrier( + commandBuffer, + srcStageMask, + dstStageMask, + 0, + memoryBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers); + + THSVS_TEMP_FREE(pBufferMemoryBarriers); + THSVS_TEMP_FREE(pImageMemoryBarriers); +} + +void thsvsCmdSetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + uint32_t prevAccessCount, + const ThsvsAccessType* pPrevAccesses) +{ + VkPipelineStageFlags stageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + for (uint32_t i = 0; i < prevAccessCount; ++i) + { + ThsvsAccessType prevAccess = pPrevAccesses[i]; + const ThsvsVkAccessInfo* pPrevAccessInfo = &ThsvsAccessMap[prevAccess]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the previous access index is a valid range for the lookup + assert(prevAccess < THSVS_NUM_ACCESS_TYPES); +#endif + + stageMask |= pPrevAccessInfo->stageMask; + } + + vkCmdSetEvent( + commandBuffer, + event, + stageMask); +} + +void thsvsCmdResetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + uint32_t prevAccessCount, + const ThsvsAccessType* pPrevAccesses) +{ + VkPipelineStageFlags stageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + for (uint32_t i = 0; i < prevAccessCount; ++i) + { + ThsvsAccessType prevAccess = pPrevAccesses[i]; + const ThsvsVkAccessInfo* pPrevAccessInfo = &ThsvsAccessMap[prevAccess]; + +#ifdef THSVS_ERROR_CHECK_ACCESS_TYPE_IN_RANGE + // Asserts that the previous access index is a valid range for the lookup + assert(prevAccess < THSVS_NUM_ACCESS_TYPES); +#endif + + stageMask |= pPrevAccessInfo->stageMask; + } + + vkCmdResetEvent( + commandBuffer, + event, + stageMask); +} + +void thsvsCmdWaitEvents( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + const ThsvsGlobalBarrier* pGlobalBarrier, + uint32_t bufferBarrierCount, + const ThsvsBufferBarrier* pBufferBarriers, + uint32_t imageBarrierCount, + const ThsvsImageBarrier* pImageBarriers) +{ + VkMemoryBarrier memoryBarrier; + // Vulkan pipeline barrier command parameters + // commandBuffer; + // eventCount; + // pEvents; + VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + uint32_t memoryBarrierCount = (pGlobalBarrier != NULL) ? 1 : 0; + VkMemoryBarrier* pMemoryBarriers = (pGlobalBarrier != NULL) ? &memoryBarrier : NULL; + uint32_t bufferMemoryBarrierCount = bufferBarrierCount; + VkBufferMemoryBarrier* pBufferMemoryBarriers = NULL; + uint32_t imageMemoryBarrierCount = imageBarrierCount; + VkImageMemoryBarrier* pImageMemoryBarriers = NULL; + + // Global memory barrier + if (pGlobalBarrier != NULL) + { + VkPipelineStageFlags tempSrcStageMask = 0; + VkPipelineStageFlags tempDstStageMask = 0; + thsvsGetVulkanMemoryBarrier(*pGlobalBarrier, &tempSrcStageMask, &tempDstStageMask, pMemoryBarriers); + srcStageMask |= tempSrcStageMask; + dstStageMask |= tempDstStageMask; + } + + // Buffer memory barriers + if (bufferBarrierCount > 0) + { + pBufferMemoryBarriers = (VkBufferMemoryBarrier*)THSVS_TEMP_ALLOC(sizeof(VkBufferMemoryBarrier) * bufferMemoryBarrierCount); + + VkPipelineStageFlags tempSrcStageMask = 0; + VkPipelineStageFlags tempDstStageMask = 0; + for (uint32_t i = 0; i < bufferBarrierCount; ++i) + { + thsvsGetVulkanBufferMemoryBarrier(pBufferBarriers[i], &tempSrcStageMask, &tempDstStageMask, &pBufferMemoryBarriers[i]); + srcStageMask |= tempSrcStageMask; + dstStageMask |= tempDstStageMask; + } + } + + // Image memory barriers + if (imageBarrierCount > 0) + { + pImageMemoryBarriers = (VkImageMemoryBarrier*)THSVS_TEMP_ALLOC(sizeof(VkImageMemoryBarrier) * imageMemoryBarrierCount); + + VkPipelineStageFlags tempSrcStageMask = 0; + VkPipelineStageFlags tempDstStageMask = 0; + for (uint32_t i = 0; i < imageBarrierCount; ++i) + { + thsvsGetVulkanImageMemoryBarrier(pImageBarriers[i], &tempSrcStageMask, &tempDstStageMask, &pImageMemoryBarriers[i]); + srcStageMask |= tempSrcStageMask; + dstStageMask |= tempDstStageMask; + } + } + + vkCmdWaitEvents( + commandBuffer, + eventCount, + pEvents, + srcStageMask, + dstStageMask, + memoryBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers); + + THSVS_TEMP_FREE(pBufferMemoryBarriers); + THSVS_TEMP_FREE(pImageMemoryBarriers); +} + +#endif // THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION diff --git a/src/game/main.c b/src/game/main.c index ca262dd..030fb12 100644 --- a/src/game/main.c +++ b/src/game/main.c @@ -71,6 +71,21 @@ void Init(void) { .Finalize = PassFinalize}; rtBindRenderPass(_framegraph, rtCalculateRenderPassID("pass0", sizeof("pass0") - 1), &bind); rtBindRenderPass(_framegraph, rtCalculateRenderPassID("pass1", sizeof("pass1") - 1), &bind); + + rt_v2 vertices[] = { + { 0, 0.5}, + { 0.5, -0.5}, + {-0.5, -0.5} + }; + rt_buffer_info info = { + .type = RT_BUFFER_TYPE_VERTEX, + .usage = RT_BUFFER_USAGE_STATIC, + .size = sizeof(vertices), + .data = vertices, + }; + rt_buffer_handle buf; + g_renderer.CreateBuffers(1, &info, &buf); + #endif } diff --git a/src/gfx/gfx_framegraph.c b/src/gfx/gfx_framegraph.c index 3260763..7100361 100644 --- a/src/gfx/gfx_framegraph.c +++ b/src/gfx/gfx_framegraph.c @@ -25,7 +25,7 @@ typedef struct { unsigned int width; unsigned int height; unsigned int sample_count; - rt_gpu_semaphore_handle semaphore; + rt_gpu_semaphore_handle semaphores[3]; rt_render_target_handle api_render_target; } rt_render_target; @@ -309,6 +309,8 @@ static bool CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) { bool result = false; + unsigned int max_frames_in_flight = g_renderer.GetMaxFramesInFlight(); + /* TODO(Kevin): determine aliasing opportunities */ const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets); for (uint32_t i = 0; i < info->render_target_count; ++i) { @@ -339,13 +341,15 @@ CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_are } } + for (unsigned int j = 0; j < max_frames_in_flight; ++j) { char sem_name[128]; - rtSPrint(sem_name, 128, "%s - Semaphore", (name) ? name : "Unnamed RT"); + rtSPrint(sem_name, 128, "%s - Semaphore (%u)", (name) ? name : "Unnamed RT", j); rt_gpu_semaphore_info sem_info = { .initial_value = 0, .name = sem_name, }; - g_renderer.CreateSemaphores(1, &sem_info, &graph->render_targets[i].semaphore); + g_renderer.CreateSemaphores(1, &sem_info, &graph->render_targets[i].semaphores[j]); + } if (graph->render_targets[i].width != RT_RENDER_TARGET_SIZE_SWAPCHAIN || graph->render_targets[i].height != RT_RENDER_TARGET_SIZE_SWAPCHAIN || @@ -639,6 +643,8 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr rt_gpu_semaphore_handle swapchain_available = g_renderer.GetSwapchainAvailableSemaphore(); rt_gpu_semaphore_handle render_finished = g_renderer.GetRenderFinishedSemaphore(); + unsigned int max_frames_in_flight = g_renderer.GetMaxFramesInFlight(); + unsigned int frame_index = frame_id % max_frames_in_flight; rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); if (!temp.arena) { @@ -819,6 +825,10 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr if (is_graphics_pass) { g_renderer.CmdEndPass(cmdbuf); } + for (uint32_t j = 0; j < write_count; j++) { + rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target); + g_renderer.CmdFlushRenderTargetWrite(cmdbuf, rt->api_render_target); + } rt_gpu_semaphore_handle *wait_semaphores = NULL, *signal_semaphores = NULL; uint64_t *wait_values = NULL, *signal_values = NULL; @@ -847,18 +857,18 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr for (uint32_t j = 0; j < read_count; ++j) { rt_render_target *rt = GetRenderTarget(framegraph, reads[j].render_target); - wait_semaphores[*wait_count] = rt->semaphore; + wait_semaphores[*wait_count] = rt->semaphores[frame_index]; wait_values[*wait_count] = signal_value_base + execution_level; *wait_count += 1; } for (uint32_t j = 0; j < write_count; ++j) { rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target); - signal_semaphores[*signal_count] = rt->semaphore; + signal_semaphores[*signal_count] = rt->semaphores[frame_index]; signal_values[*signal_count] = signal_value_base + execution_level + 1; *signal_count += 1; if (signal_value_base >= 200) { - wait_semaphores[*wait_count] = rt->semaphore; + wait_semaphores[*wait_count] = rt->semaphores[frame_index]; wait_values[*wait_count] = signal_value_base - 200 + execution_level + 1; *wait_count += 1; } diff --git a/src/gfx/gfx_main.c b/src/gfx/gfx_main.c index 12e63e4..fcebcfb 100644 --- a/src/gfx/gfx_main.c +++ b/src/gfx/gfx_main.c @@ -23,12 +23,15 @@ static rt_dynlib _renderer_lib; static bool _renderer_loaded = false; RT_DLLEXPORT -RT_CVAR_S(rt_Renderer, "Select the render backend. Available options: [vk, null], Default: vk", "vk"); +RT_CVAR_S(rt_Renderer, + "Select the render backend. Available options: [vk, null], Default: vk", + "vk"); #ifdef RT_STATIC_LIB extern void RT_RENDERER_API_FN(RegisterCVars)(void); extern rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *); extern void RT_RENDERER_API_FN(Shutdown)(void); +extern unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void); extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int); extern void RT_RENDERER_API_FN(EndFrame)(unsigned int); extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *); @@ -49,12 +52,17 @@ extern void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semapho extern uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle); extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void); extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void); +extern rt_result + RT_RENDERER_API_FN(CreateBuffers)(uint32_t, const rt_buffer_info *, rt_buffer_handle *); +extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *); extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle, const rt_cmd_begin_pass_info *); extern void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle); extern void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle, rt_render_target_handle, rt_render_target_state); +extern void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle, + rt_render_target_handle); #endif extern rt_result InitFramegraphManager(void); @@ -84,6 +92,7 @@ static bool LoadRenderer(void) { RETRIEVE_SYMBOL(RegisterCVars, rt_register_renderer_cvars_fn); RETRIEVE_SYMBOL(Init, rt_init_renderer_fn); RETRIEVE_SYMBOL(Shutdown, rt_shutdown_renderer_fn); + RETRIEVE_SYMBOL(GetMaxFramesInFlight, rt_get_max_frames_in_flight_fn); RETRIEVE_SYMBOL(BeginFrame, rt_begin_frame_fn); RETRIEVE_SYMBOL(EndFrame, rt_end_frame_fn); RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn); @@ -98,9 +107,12 @@ static bool LoadRenderer(void) { RETRIEVE_SYMBOL(GetSemaphoreValue, rt_get_gpu_semaphore_value_fn); RETRIEVE_SYMBOL(GetSwapchainAvailableSemaphore, rt_get_swapchain_available_semaphore_fn); RETRIEVE_SYMBOL(GetRenderFinishedSemaphore, rt_get_render_finished_semaphore_fn); + RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn); + RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn); RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn); RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn); RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn); + RETRIEVE_SYMBOL(CmdFlushRenderTargetWrite, rt_cmd_flush_render_target_write_fn); } else { rtReportError("GFX", "Unsupported renderer backend: (%s) %s", @@ -113,6 +125,7 @@ static bool LoadRenderer(void) { g_renderer.RegisterCVars = &rtRenRegisterCVars; g_renderer.Init = &rtRenInit; g_renderer.Shutdown = &rtRenShutdown; + g_renderer.GetMaxFramesInFlight = &rtRenGetMaxFramesInFlight; g_renderer.BeginFrame = &rtRenBeginFrame; g_renderer.EndFrame = &rtRenEndFrame; g_renderer.CompilePipeline = &rtRenCompilePipeline; @@ -127,9 +140,12 @@ static bool LoadRenderer(void) { g_renderer.GetSemaphoreValue = &rtRenGetSemaphoreValue; g_renderer.GetSwapchainAvailableSemaphore = &rtRenGetSwapchainAvailableSemaphore; g_renderer.GetRenderFinishedSemaphore = &rtRenGetRenderFinishedSemaphore; + g_renderer.CreateBuffers = &rtRenCreateBuffers; + g_renderer.DestroyBuffers = &rtRenDestroyBuffers; g_renderer.CmdBeginPass = &rtRenCmdBeginPass; g_renderer.CmdEndPass = &rtRenCmdEndPass; g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget; + g_renderer.CmdFlushRenderTargetWrite = &rtRenCmdFlushRenderTargetWrite; #endif return true; } diff --git a/src/gfx/renderer_api.h b/src/gfx/renderer_api.h index fb21b2a..89a4389 100644 --- a/src/gfx/renderer_api.h +++ b/src/gfx/renderer_api.h @@ -210,6 +210,7 @@ typedef struct rt_pipeline_info_s rt_pipeline_info; typedef void rt_register_renderer_cvars_fn(void); typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info); typedef void rt_shutdown_renderer_fn(void); +typedef unsigned int rt_get_max_frames_in_flight_fn(void); typedef void rt_begin_frame_fn(unsigned int frame_id); typedef void rt_end_frame_fn(unsigned int frame_id); typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info); @@ -239,11 +240,14 @@ typedef void rt_cmd_end_pass_fn(rt_command_buffer_handle cmdbuf); typedef void rt_cmd_transition_render_target_fn(rt_command_buffer_handle cmdbuf, rt_render_target_handle render_target, rt_render_target_state new_state); +typedef void rt_cmd_flush_render_target_write_fn(rt_command_buffer_handle cmdbuf, + rt_render_target_handle render_target); typedef struct { rt_register_renderer_cvars_fn *RegisterCVars; rt_init_renderer_fn *Init; rt_shutdown_renderer_fn *Shutdown; + rt_get_max_frames_in_flight_fn *GetMaxFramesInFlight; rt_begin_frame_fn *BeginFrame; rt_end_frame_fn *EndFrame; rt_compile_pipeline_fn *CompilePipeline; @@ -265,6 +269,7 @@ typedef struct { rt_cmd_begin_pass_fn *CmdBeginPass; rt_cmd_end_pass_fn *CmdEndPass; rt_cmd_transition_render_target_fn *CmdTransitionRenderTarget; + rt_cmd_flush_render_target_write_fn *CmdFlushRenderTargetWrite; } rt_renderer_api; #define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name diff --git a/src/renderer/null/null.c b/src/renderer/null/null.c index 38de3fe..c179d9f 100644 --- a/src/renderer/null/null.c +++ b/src/renderer/null/null.c @@ -30,6 +30,10 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { void RT_RENDERER_API_FN(Shutdown)(void) { } +unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) { + return 2; +} + void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) { RT_UNUSED(frame_id); } @@ -132,4 +136,10 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmd, RT_UNUSED(cmd); RT_UNUSED(target); RT_UNUSED(state); +} + +void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle, + rt_render_target_handle render_target) { + RT_UNUSED(cmdbuf_handle); + RT_UNUSED(render_target); } \ No newline at end of file diff --git a/src/renderer/vk/buffers.c b/src/renderer/vk/buffers.c index 831d02d..a298b06 100644 --- a/src/renderer/vk/buffers.c +++ b/src/renderer/vk/buffers.c @@ -1,5 +1,10 @@ +#include "command_buffers.h" #include "gpu.h" +#include "transfers.h" +#include "resources.h" + #include "gfx/renderer_api.h" + #include "runtime/config.h" #include "runtime/threading.h" @@ -9,14 +14,8 @@ RT_CVAR_I(rt_VkMaxBufferCount, "Number of slots for gpu buffers. Default: 1024.", 1024); typedef struct rt_buffer_data_s { - VkBuffer buffer; - VmaAllocation allocation; - size_t size; - rt_buffer_usage usage; - rt_buffer_type type; - - rt_rwlock lock; - + rt_buffer data; + uint32_t version; struct rt_buffer_data_s *next_free; } rt_buffer_data; @@ -26,18 +25,22 @@ static rt_mutex *_list_lock; rt_result InitBufferManagement(void) { size_t n = (size_t)rt_VkMaxBufferCount.i; - _buffers = malloc(sizeof(rt_buffer_data) * n); + _buffers = calloc(n, sizeof(rt_buffer_data)); if (!_buffers) return RT_OUT_OF_MEMORY; + _first_free = &_buffers[1]; + for (size_t i = 1; i < n - 1; ++i) + _buffers[i].next_free = &_buffers[i + 1]; + _list_lock = rtCreateMutex(); return RT_SUCCESS; } void ShutdownBufferManagement(void) { for (int i = 0; i < rt_VkMaxBufferCount.i; ++i) { - if (_buffers[i].buffer == VK_NULL_HANDLE) + if (_buffers[i].data.buffer == VK_NULL_HANDLE) continue; - vmaDestroyBuffer(g_gpu.allocator, _buffers[i].buffer, _buffers[i].allocation); - rtDestroyRWLock(&_buffers[i].lock); + vmaDestroyBuffer(g_gpu.allocator, _buffers[i].data.buffer, _buffers[i].data.allocation); + rtDestroyRWLock(&_buffers[i].data.lock); memset(&_buffers[i], 0, sizeof(_buffers[i])); } free(_buffers); @@ -47,6 +50,33 @@ void ShutdownBufferManagement(void) { void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers); +static void UploadViaMap(rt_buffer_data *buffer, const void *data, size_t size) { + rtLockWrite(&buffer->data.lock); + void *dev_mem = NULL; + if (vmaMapMemory(g_gpu.allocator, buffer->data.allocation, &dev_mem) != VK_SUCCESS) { + rtReportError("vk", "Unable to map buffer for upload"); + rtUnlockWrite(&buffer->data.lock); + return; + } + memcpy(dev_mem, data, size); + vmaUnmapMemory(g_gpu.allocator, buffer->data.allocation); + if (!buffer->data.coherent) + vmaFlushAllocation(g_gpu.allocator, buffer->data.allocation, 0, VK_WHOLE_SIZE); + rtUnlockWrite(&buffer->data.lock); +} + +/* Convenience function that decides between mapping or uploading via transfer buffer */ +static void UploadData(rt_buffer_data *buffer, const void *data, size_t size) { + if (buffer->data.mappable) + UploadViaMap(buffer, data, size); + else + rtUploadToBuffer(buffer->data.buffer, + buffer->data.allocation, + buffer->data.owner, + data, + size); +} + rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers) { @@ -59,11 +89,131 @@ rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count, rtRenDestroyBuffers(i, p_buffers); return RT_OUT_OF_MEMORY; } + _first_free = slot->next_free; rtUnlockMutex(_list_lock); + + VkBufferUsageFlags buffer_usage = 0; + switch (info->type) { + case RT_BUFFER_TYPE_VERTEX: + buffer_usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + break; + case RT_BUFFER_TYPE_INDEX: + buffer_usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + break; + case RT_BUFFER_TYPE_STORAGE: + buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + break; + case RT_BUFFER_TYPE_UNIFORM: + buffer_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + break; + } + buffer_usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + VkBufferCreateInfo buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = info->size, + .usage = buffer_usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VmaMemoryUsage alloc_usage = 0; + VmaAllocationCreateFlags alloc_flags = 0; + switch (info->usage) { + case RT_BUFFER_USAGE_STATIC: + alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + alloc_flags = 0; + break; + case RT_BUFFER_USAGE_DYNAMIC: + alloc_usage = VMA_MEMORY_USAGE_AUTO; + alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; + break; + case RT_BUFFER_USAGE_TRANSIENT: + alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + break; + } + VmaAllocationCreateInfo alloc_info = {.usage = alloc_usage, .flags = alloc_flags}; + + VkResult res = vmaCreateBuffer(g_gpu.allocator, + &buffer_info, + &alloc_info, + &slot->data.buffer, + &slot->data.allocation, + NULL); + if (res != VK_SUCCESS) { + rtReportError("vk", "Failed to create a buffer: %u", res); + rtLockMutex(_list_lock); + slot->next_free = _first_free; + _first_free = slot; + rtUnlockMutex(_list_lock); + if (i > 0) + rtRenDestroyBuffers(i, p_buffers); + return RT_UNKNOWN_ERROR; + } + + rt_create_rwlock_result lock_res = rtCreateRWLock(); + if (!lock_res.ok) { + rtReportError("vk", "Failed to create lock for buffer."); + vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation); + rtLockMutex(_list_lock); + slot->next_free = _first_free; + _first_free = slot; + rtUnlockMutex(_list_lock); + if (i > 0) + rtRenDestroyBuffers(i, p_buffers); + return RT_UNKNOWN_ERROR; + } + + VkMemoryPropertyFlags properties; + vmaGetAllocationMemoryProperties(g_gpu.allocator, slot->data.allocation, &properties); + slot->data.mappable = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; + slot->data.coherent = (properties & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; + + slot->data.owner = RT_VK_UNOWNED; + slot->data.state = RT_BUFFER_STATE_NOT_USED; + + if (info->data) + UploadData(slot, info->data, info->size); + + ptrdiff_t index = slot - _buffers; + p_buffers[i].index = (uint32_t)index; + p_buffers[i].version = slot->version; } return RT_SUCCESS; } void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) { + for (uint32_t i = 0; i < count; ++i) { + if (buffers[i].index >= (uint32_t)rt_VkMaxBufferCount.i) + continue; + rt_buffer_data *slot = &_buffers[buffers[i].index]; + if (slot->version != buffers[i].version) { + rtLog("vk", "Tried to destroy a buffer with an invalid handle (version mismatch)."); + continue; + } + rtLockWrite(&slot->data.lock); + slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION; + vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation); + slot->data.buffer = VK_NULL_HANDLE; + slot->data.allocation = VK_NULL_HANDLE; + rtUnlockWrite(&slot->data.lock); + rtDestroyRWLock(&slot->data.lock); + rtLockMutex(_list_lock); + slot->next_free = _first_free; + _first_free = slot; + rtUnlockMutex(_list_lock); + } } + +rt_buffer *rtGetBuffer(rt_buffer_handle handle) { + if (handle.index >= (uint32_t)rt_VkMaxBufferCount.i) + return NULL; + rt_buffer_data *slot = &_buffers[handle.index]; + if (slot->version != handle.version) { + rtLog("vk", "Tried to access a buffer with an invalid handle (version mismatch)."); + return NULL; + } + return &slot->data; +} \ No newline at end of file diff --git a/src/renderer/vk/command_buffers.c b/src/renderer/vk/command_buffers.c index 00721d8..2af469e 100644 --- a/src/renderer/vk/command_buffers.c +++ b/src/renderer/vk/command_buffers.c @@ -330,8 +330,11 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue, rtLog("vk", " - TODO: More Info"); rtLog("vk", "Wait Semaphores:"); for (uint32_t i = 0; i < wait_count; ++i) { - rtLog("vk", " - %u:%u Value %u", info->wait_semaphores[i].version, - info->wait_semaphores[i].index, info->wait_values[i]); + rtLog("vk", + " - %u:%u Value %u", + info->wait_semaphores[i].version, + info->wait_semaphores[i].index, + info->wait_values[i]); } rtLog("vk", "Signal Semaphores:"); for (uint32_t i = 0; i < signal_count; ++i) { @@ -417,7 +420,8 @@ rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer, const VkSemaphore *signal_semaphores, const uint32_t *signal_values, uint32_t signal_semaphore_count, - rt_gpu_queue queue) { + rt_gpu_queue queue, + VkFence fence) { rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); if (!temp.arena) @@ -476,7 +480,7 @@ rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer, .pCommandBufferInfos = &command_buffer_info, }; - if (vkQueueSubmit2(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) { + if (vkQueueSubmit2(target_queue, 1, &submit_info, fence) != VK_SUCCESS) { rtLog("vk", "vkQueueSubmit failed."); result = RT_UNKNOWN_ERROR; } diff --git a/src/renderer/vk/command_buffers.h b/src/renderer/vk/command_buffers.h index 9a26c57..b526498 100644 --- a/src/renderer/vk/command_buffers.h +++ b/src/renderer/vk/command_buffers.h @@ -19,6 +19,7 @@ rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer, const VkSemaphore *signal_semaphores, const uint32_t *signal_values, uint32_t signal_semaphore_count, - rt_gpu_queue queue); + rt_gpu_queue queue, + VkFence fence); #endif diff --git a/src/renderer/vk/commands.c b/src/renderer/vk/commands.c index 493146d..9368688 100644 --- a/src/renderer/vk/commands.c +++ b/src/renderer/vk/commands.c @@ -9,6 +9,13 @@ #include +#define USE_SIMPLE_SYNC_LIB 0 + +#if USE_SIMPLE_SYNC_LIB +#include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h" +#include +#endif + /* Retrieve the VkCommandBuffer as varname, or return */ #define GET_CMDBUF(varname, handle) \ VkCommandBuffer varname = rtGetCommandBuffer((handle)); \ @@ -196,8 +203,8 @@ static void ExecuteRenderTargetBarrier(rt_render_target *rt, /* Determine access flags */ VkPipelineStageFlags2 src_stage = 0; VkPipelineStageFlags2 dst_stage = 0; - VkAccessFlags2 src_access = 0; - VkAccessFlags2 dst_access = 0; + VkAccessFlags2 src_access = 0; + VkAccessFlags2 dst_access = 0; if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) { src_access = (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) @@ -265,7 +272,7 @@ static void DoLayoutTransition(rt_render_target *rt, uint32_t image_index, rt_render_target_state new_state, VkCommandBuffer cmdbuf) { - +#if !USE_SIMPLE_SYNC_LIB /* Determine old and new layout */ VkImageLayout old_layout; switch (rt->states[image_index]) { @@ -274,7 +281,7 @@ static void DoLayoutTransition(rt_render_target *rt, break; case RT_RENDER_TARGET_STATE_STORAGE_IMAGE: case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE: - old_layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL; + old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; default: old_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -287,7 +294,7 @@ static void DoLayoutTransition(rt_render_target *rt, break; case RT_RENDER_TARGET_STATE_STORAGE_IMAGE: case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE: - new_layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL; + new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; default: new_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -306,7 +313,7 @@ static void DoLayoutTransition(rt_render_target *rt, (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_COLOR_BIT; - + VkPipelineStageFlags2 src_stage = 0; VkPipelineStageFlags2 dst_stage = 0; /* Determine access flags */ @@ -323,23 +330,26 @@ static void DoLayoutTransition(rt_render_target *rt, VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; } else { /* SAMPLED_IMAGE or STORAGE_IMAGE */ - src_access = VK_ACCESS_2_SHADER_WRITE_BIT; - src_stage = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; + src_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT; + src_stage = + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT; // VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + // VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; } if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) { dst_access = (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) - ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT - : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; - dst_stage = - (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT + : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT; + dst_stage = (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; - } else { /* SAMPLED_IMAGE or STORAGE_IMAGE */ - dst_access = VK_ACCESS_2_SHADER_READ_BIT; - dst_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + } else { /* SAMPLED_IMAGE or STORAGE_IMAGE */ + dst_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT; + dst_stage = VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; + ; } VkImageMemoryBarrier2 image_barrier = { @@ -372,7 +382,64 @@ static void DoLayoutTransition(rt_render_target *rt, #ifdef RT_DEBUG vkCmdEndDebugUtilsLabelEXT(cmdbuf); #endif +#else + ThsvsAccessType prev_access; + if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) { + if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + prev_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE; + else + prev_access = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE; + } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) { + prev_access = THSVS_ACCESS_NONE; + } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) { + prev_access = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER; + } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) { + prev_access = THSVS_ACCESS_ANY_SHADER_READ_OTHER; + } + ThsvsAccessType next_accesses[2]; + uint32_t next_access_count = 0; + if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) { + if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) { + next_accesses[0] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ; + next_accesses[1] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE; + } else { + next_accesses[0] = THSVS_ACCESS_COLOR_ATTACHMENT_READ; + next_accesses[1] = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE; + } + next_access_count = 2; + } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) { + next_accesses[0] = THSVS_ACCESS_NONE; + next_access_count = 1; + } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) { + next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER; + next_access_count = 1; + } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) { + next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_OTHER; + next_accesses[1] = THSVS_ACCESS_ANY_SHADER_WRITE; + next_access_count = 2; + } + VkImageAspectFlags aspect_mask = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT + : VK_IMAGE_ASPECT_COLOR_BIT; + ThsvsImageBarrier barrier = {0}; + barrier.image = rt->image[image_index]; + barrier.pPrevAccesses = &prev_access; + barrier.prevAccessCount = 1; + barrier.prevLayout = THSVS_IMAGE_LAYOUT_OPTIMAL; + barrier.nextAccessCount = next_access_count; + barrier.pNextAccesses = next_accesses; + barrier.nextLayout = THSVS_IMAGE_LAYOUT_OPTIMAL; + barrier.discardContents = false; + barrier.subresourceRange.aspectMask = aspect_mask; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + thsvsCmdPipelineBarrier(cmdbuf, NULL, 0, NULL, 1, &barrier); + +#endif rt->states[image_index] = new_state; } @@ -396,3 +463,48 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb else ExecuteRenderTargetBarrier(rt, image_index, cmdbuf); } + +void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle, + rt_render_target_handle render_target) { + GET_CMDBUF(cmdbuf, cmdbuf_handle) + uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight; + if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) { + image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index; + } + rt_render_target *rt = rtGetRenderTarget(render_target); + if (!rt) { + rtLog("vk", "Tried to flush invalid render target"); + return; + } + + VkAccessFlags2 src_access; + VkPipelineStageFlags2 src_stage; + if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) { + src_access = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT + : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; + src_stage = + (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) + ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR + : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE){ /* SAMPLED_IMAGE or STORAGE_IMAGE */ + src_access = VK_ACCESS_2_MEMORY_WRITE_BIT; + src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + }else { + return; + } + + VkMemoryBarrier2 barrier = {.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcAccessMask = src_access, + .srcStageMask = src_stage, + .dstAccessMask = 0, + .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT}; + VkDependencyInfo dep = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .memoryBarrierCount = 1, + .pMemoryBarriers = &barrier, + }; + vkCmdPipelineBarrier2(cmdbuf, &dep); +} \ No newline at end of file diff --git a/src/renderer/vk/frame.c b/src/renderer/vk/frame.c index 9beec6b..94e664b 100644 --- a/src/renderer/vk/frame.c +++ b/src/renderer/vk/frame.c @@ -2,6 +2,7 @@ #include "gpu.h" #include "render_targets.h" #include "swapchain.h" +#include "transfers.h" #include "gfx/renderer_api.h" @@ -114,7 +115,8 @@ void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) { &frame->swapchain_transitioned, NULL, 1, - RT_GRAPHICS_QUEUE) != RT_SUCCESS) { + RT_GRAPHICS_QUEUE, + VK_NULL_HANDLE) != RT_SUCCESS) { rtReportError("vk", "Failed to submit the layout transition for the swapchain image."); return; } @@ -132,4 +134,6 @@ void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) { if (res != VK_SUCCESS) { rtReportError("vk", "vkQueuePresentKHR failed: %u", res); } + + rtFlushGPUTransfers(); } \ No newline at end of file diff --git a/src/renderer/vk/gpu.h b/src/renderer/vk/gpu.h index 65ab239..5c7b8ba 100644 --- a/src/renderer/vk/gpu.h +++ b/src/renderer/vk/gpu.h @@ -9,6 +9,9 @@ #include "gfx/renderer_api.h" +/* Used to mark a resource as not owned by a particular queue */ +#define RT_VK_UNOWNED 255 + /* Minimum supported value of g_gpu.max_frames_in_flight */ #define RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT 2 @@ -97,6 +100,8 @@ VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count); VkQueue rtGetQueue(rt_gpu_queue queue); +uint32_t rtGetQueueFamily(rt_gpu_queue queue); + const char *rtVkFormatToString(VkFormat format); #endif diff --git a/src/renderer/vk/helper.c b/src/renderer/vk/helper.c index 28cb63f..e5a9aef 100644 --- a/src/renderer/vk/helper.c +++ b/src/renderer/vk/helper.c @@ -55,6 +55,20 @@ VkQueue rtGetQueue(rt_gpu_queue queue) { } } +uint32_t rtGetQueueFamily(rt_gpu_queue queue) { + switch (queue) { + case RT_GRAPHICS_QUEUE: + return g_gpu.graphics_family; + case RT_COMPUTE_QUEUE: + return g_gpu.compute_family; + case RT_TRANSFER_QUEUE: + return g_gpu.transfer_family; + default: + return UINT32_MAX; + } +} + + const char *rtVkFormatToString(VkFormat format) { switch (format) { case VK_FORMAT_R8G8B8A8_UNORM: diff --git a/src/renderer/vk/init.c b/src/renderer/vk/init.c index d6f6a63..7425fee 100644 --- a/src/renderer/vk/init.c +++ b/src/renderer/vk/init.c @@ -648,6 +648,8 @@ extern rt_result InitializeSempahoreManagement(void); extern void ShutdownSemaphoreManagement(void); extern rt_result InitBufferManagement(void); extern void ShutdownBufferManagement(void); +extern rt_result InitializeTransfers(void); +extern void ShutdownTransfers(void); rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { rtLog("vk", "Init"); @@ -697,6 +699,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { if (res != RT_SUCCESS) return res; res = InitBufferManagement(); + if (res != RT_SUCCESS) + return res; + res = InitializeTransfers(); if (res != RT_SUCCESS) return res; res = rtCreateSwapchain(); @@ -711,6 +716,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) { rtLog("vk", "Shutdown"); vkDeviceWaitIdle(g_gpu.device); rtDestroySwapchain(); + ShutdownTransfers(); ShutdownBufferManagement(); ShutdownCommandBufferManagement(); ShutdownSemaphoreManagement(); @@ -725,3 +731,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) { #endif vkDestroyInstance(g_gpu.instance, g_gpu.alloc_cb); } + +unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) { + return g_gpu.max_frames_in_flight; +} \ No newline at end of file diff --git a/src/renderer/vk/meson.build b/src/renderer/vk/meson.build index f830922..a267a87 100644 --- a/src/renderer/vk/meson.build +++ b/src/renderer/vk/meson.build @@ -15,6 +15,7 @@ if vk_dep.found() 'pipelines.h', 'render_targets.h', 'swapchain.h', + 'transfers.h', 'buffers.c', 'command_buffers.c', @@ -26,6 +27,9 @@ if vk_dep.found() 'pipelines.c', 'render_targets.c', 'swapchain.c', + 'transfers.c', + + 'simple_sync_impl.cpp', # Contrib Sources '../../../contrib/volk/volk.h', diff --git a/src/renderer/vk/resources.h b/src/renderer/vk/resources.h new file mode 100644 index 0000000..10f2136 --- /dev/null +++ b/src/renderer/vk/resources.h @@ -0,0 +1,80 @@ +#ifndef RT_VK_RESOURCES_H +#define RT_VK_RESOURCES_H + +/* Buffers and images */ + +#include "gpu.h" + +#include "runtime/threading.h" + +typedef enum { + RT_BUFFER_STATE_INVALID, + + RT_BUFFER_STATE_NOT_USED, + + RT_BUFFER_STATE_IN_USE, + + RT_BUFFER_STATE_IN_TRANSFER, +} rt_buffer_state; + +typedef struct { + VkBuffer buffer; + VmaAllocation allocation; + size_t size; + rt_buffer_usage usage; + rt_buffer_type type; + rt_buffer_state state; + rt_rwlock lock; + + bool mappable; + bool coherent; + + rt_gpu_queue owner; +} rt_buffer; + + +rt_buffer *rtGetBuffer(rt_buffer_handle handle); + +/* Helper functions for accessing buffers */ + +RT_INLINE rt_gpu_queue rtGetBufferOwner(rt_buffer_handle handle) { + rt_buffer *buffer = rtGetBuffer(handle); + rt_gpu_queue owner = RT_VK_UNOWNED; + if (buffer) { + rtLockRead(&buffer->lock); + owner = buffer->owner; + rtUnlockRead(&buffer->lock); + } + return owner; +} + +RT_INLINE void rtSetBufferOwner(rt_buffer_handle handle, rt_gpu_queue owner) { + rt_buffer *buffer = rtGetBuffer(handle); + if (buffer) { + rtLockWrite(&buffer->lock); + buffer->owner = owner; + rtUnlockWrite(&buffer->lock); + } +} + +RT_INLINE rt_buffer_state rtGetBufferState(rt_buffer_handle handle) { + rt_buffer *buffer = rtGetBuffer(handle); + rt_buffer_state state = RT_BUFFER_STATE_INVALID; + if (buffer) { + rtLockRead(&buffer->lock); + state = buffer->state; + rtUnlockRead(&buffer->lock); + } + return state; +} + +RT_INLINE void rtSetBufferState(rt_buffer_handle handle, rt_buffer_state state) { + rt_buffer *buffer = rtGetBuffer(handle); + if (buffer) { + rtLockWrite(&buffer->lock); + buffer->state = state; + rtUnlockWrite(&buffer->lock); + } +} + +#endif diff --git a/src/renderer/vk/simple_sync_impl.cpp b/src/renderer/vk/simple_sync_impl.cpp new file mode 100644 index 0000000..db42d61 --- /dev/null +++ b/src/renderer/vk/simple_sync_impl.cpp @@ -0,0 +1,6 @@ +#include "gpu.h" + +extern "C" { +#define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION +#include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h" +} \ No newline at end of file diff --git a/src/renderer/vk/transfers.c b/src/renderer/vk/transfers.c new file mode 100644 index 0000000..403a256 --- /dev/null +++ b/src/renderer/vk/transfers.c @@ -0,0 +1,263 @@ +#include "transfers.h" +#include "command_buffers.h" + +#include "runtime/config.h" +#include "runtime/mem_arena.h" +#include "runtime/threading.h" + +#include + +RT_CVAR_I(rt_VkTransferSlotCount, + "Number of available transfer slots per frame. Default: 512", + 512); + +/* This is a temporary solution. We probably should keep a pool of buffers + * to avoid re-creating the buffers all the time. */ + +typedef struct { + VkBuffer buffer; + VmaAllocation allocation; + bool requires_flush; +} rt_transfer_buffer; + +typedef struct { + rt_transfer_buffer tbuf; + VkFence fence; + VkSemaphore ownership_transfer; +} rt_transfer; + +static rt_transfer *_transfers; +static uint32_t _transfer_count; +static rt_mutex *_transfer_lock; + +static rt_transfer_buffer AcquireTransferBuffer(size_t size) { + rt_transfer_buffer tbuf = {VK_NULL_HANDLE}; + + VkBufferCreateInfo buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = size, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + VmaAllocationCreateInfo alloc_info = { + .usage = VMA_MEMORY_USAGE_AUTO, + .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT, + }; + + if (vmaCreateBuffer(g_gpu.allocator, + &buffer_info, + &alloc_info, + &tbuf.buffer, + &tbuf.allocation, + NULL) == VK_SUCCESS) { + VkMemoryPropertyFlags props; + vmaGetAllocationMemoryProperties(g_gpu.allocator, tbuf.allocation, &props); + tbuf.requires_flush = (props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0; + } + return tbuf; +} + +static void ReturnTransferBuffer(rt_transfer_buffer buffer) { + vmaDestroyBuffer(g_gpu.allocator, buffer.buffer, buffer.allocation); +} + +static void CopyToTransferBuffer(rt_transfer_buffer buffer, const void *data, size_t n) { + void *tmem = NULL; + vmaMapMemory(g_gpu.allocator, buffer.allocation, &tmem); + RT_ASSERT(tmem, "Transfer Buffer memory must be mappable."); + memcpy(tmem, data, n); + vmaUnmapMemory(g_gpu.allocator, buffer.allocation); + if (buffer.requires_flush) + vmaFlushAllocation(g_gpu.allocator, buffer.allocation, 0, n); +} + +rt_result InitializeTransfers(void) { + _transfer_lock = rtCreateMutex(); + if (!_transfer_lock) + return RT_UNKNOWN_ERROR; + _transfers = calloc((size_t)rt_VkTransferSlotCount.i, sizeof(rt_transfer)); + if (!_transfers) { + rtDestroyMutex(_transfer_lock); + return RT_OUT_OF_MEMORY; + } + _transfer_count = 0; + return RT_SUCCESS; +} + +void ShutdownTransfers(void) { + rtDestroyMutex(_transfer_lock); + for (int i = 0; i < rt_VkTransferSlotCount.i; ++i) { + if (_transfers[i].fence) + vkDestroyFence(g_gpu.device, _transfers[i].fence, g_gpu.alloc_cb); + } + free(_transfers); +} + +#define TRANSFER_FAILED -1 +#define TRANSFER_NOT_NEEDED 0 +#define TRANSFER_STARTED 1 + +static int AcquireBufferOwnership(rt_transfer *transfer, + VkBuffer buffer, + rt_gpu_queue current_owner, + VkCommandBuffer transfer_cmd) { + if (!transfer->ownership_transfer) { + VkSemaphoreCreateInfo sem_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + if (vkCreateSemaphore(g_gpu.device, + &sem_info, + g_gpu.alloc_cb, + &transfer->ownership_transfer) != VK_SUCCESS) { + rtReportError("vk", "Failed to create an ownership transfer semaphore."); + return TRANSFER_FAILED; + } + } + + uint32_t src_family = rtGetQueueFamily(current_owner); + uint32_t dst_family = rtGetQueueFamily(RT_TRANSFER_QUEUE); + if (src_family == dst_family) + return TRANSFER_NOT_NEEDED; + + VkCommandBuffer cmd = rtAllocSingleCommandBuffer(current_owner); + VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + vkBeginCommandBuffer(cmd, &begin_info); + VkBufferMemoryBarrier2 release_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .buffer = buffer, + .offset = 0, + .size = VK_WHOLE_SIZE, + .srcStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, + .srcAccessMask = 0, + .srcQueueFamilyIndex = src_family, + .dstQueueFamilyIndex = dst_family, + }; + VkDependencyInfo dep = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &release_barrier, + .bufferMemoryBarrierCount = 1}; + vkCmdPipelineBarrier2(cmd, &dep); + vkEndCommandBuffer(cmd); + + VkBufferMemoryBarrier2 acquire_barrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .buffer = buffer, + .offset = 0, + .size = VK_WHOLE_SIZE, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .srcQueueFamilyIndex = src_family, + .dstQueueFamilyIndex = dst_family, + }; + VkDependencyInfo dep2 = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &acquire_barrier, + .bufferMemoryBarrierCount = 1}; + vkCmdPipelineBarrier2(transfer_cmd, &dep2); + + /* Only transfer the ownership when the frame is finished */ + VkSemaphore wait_semaphore = VK_NULL_HANDLE; + rt_frame_data *frame = rtGetFrameData(g_gpu.current_frame_id); + wait_semaphore = frame->render_finished; + + uint32_t dummy = 0; + if (rtSubmitSingleCommandBuffer(cmd, + &wait_semaphore, + &dummy, + 1, + &transfer->ownership_transfer, + &dummy, + 1, + current_owner, + VK_NULL_HANDLE) != RT_SUCCESS) + return TRANSFER_FAILED; + return TRANSFER_STARTED; +} + +rt_result rtUploadToBuffer(VkBuffer buffer, + VmaAllocation allocation, + rt_gpu_queue current_owner, + const void *data, + size_t nbytes) { + rtLockMutex(_transfer_lock); + rt_transfer *transfer = + (int)_transfer_count < rt_VkTransferSlotCount.i ? &_transfers[_transfer_count++] : NULL; + rtUnlockMutex(_transfer_lock); + if (!transfer) + return RT_NO_TRANSFER_SLOTS; + + transfer->tbuf = AcquireTransferBuffer(nbytes); + if (!transfer->tbuf.buffer) { + return RT_OUT_OF_MEMORY; + } + + CopyToTransferBuffer(transfer->tbuf, data, nbytes); + + if (!transfer->fence) { + VkFenceCreateInfo fence_info = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + }; + if (vkCreateFence(g_gpu.device, &fence_info, g_gpu.alloc_cb, &transfer->fence) != + VK_SUCCESS) { + return RT_UNKNOWN_ERROR; + } + } + + VkCommandBuffer cmd = rtAllocSingleCommandBuffer(RT_TRANSFER_QUEUE); + VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + vkBeginCommandBuffer(cmd, &begin_info); + + bool requires_ownership_transfer = + (current_owner != RT_TRANSFER_QUEUE && current_owner != RT_VK_UNOWNED); + if (requires_ownership_transfer) { + int did_transfer = AcquireBufferOwnership(transfer, buffer, current_owner, cmd); + if (did_transfer == -1) + return RT_UNKNOWN_ERROR; + else if (did_transfer == TRANSFER_NOT_NEEDED) + requires_ownership_transfer = false; + } + VkBufferCopy region = {.srcOffset = 0, .dstOffset = 0, .size = nbytes}; + vkCmdCopyBuffer(cmd, transfer->tbuf.buffer, buffer, 1, ®ion); + vkEndCommandBuffer(cmd); + + uint32_t dummy = 0; + return rtSubmitSingleCommandBuffer(cmd, + requires_ownership_transfer ? &transfer->ownership_transfer + : NULL, + requires_ownership_transfer ? &dummy : NULL, + requires_ownership_transfer ? 1 : 0, + NULL, + NULL, + 0, + RT_TRANSFER_QUEUE, + transfer->fence); +} + +/* Wait until transfers to gpu resources are finished. */ +void rtFlushGPUTransfers(void) { + if (_transfer_count == 0) + return; + rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); + RT_ASSERT(temp.arena, "Could not get a temporary arena for flushing gpu transfers."); + rtLockMutex(_transfer_lock); + VkFence *fences = RT_ARENA_PUSH_ARRAY(temp.arena, VkFence, _transfer_count); + if (!fences) { + rtReportError("vk", "Failed to allocate fences array for flushing gpu transfers."); + rtUnlockMutex(_transfer_lock); + return; + } + uint32_t count = 0; + for (uint32_t i = 0; i < _transfer_count; ++i) { + if (!_transfers[i].fence || !_transfers[i].tbuf.buffer) + continue; + fences[count++] = _transfers[i].fence; + } + vkWaitForFences(g_gpu.device, count, fences, VK_TRUE, UINT64_MAX); + for (uint32_t i = 0; i < _transfer_count; ++i) { + if (!_transfers[i].fence || !_transfers[i].tbuf.buffer) + continue; + ReturnTransferBuffer(_transfers[i].tbuf); + } + _transfer_count = 0; + rtUnlockMutex(_transfer_lock); +} diff --git a/src/renderer/vk/transfers.h b/src/renderer/vk/transfers.h new file mode 100644 index 0000000..d646c96 --- /dev/null +++ b/src/renderer/vk/transfers.h @@ -0,0 +1,16 @@ +#ifndef RT_VK_TRANSFERS_H +#define RT_VK_TRANSFERS_H + +#include "gpu.h" +#include "runtime/runtime.h" + +enum { + RT_NO_TRANSFER_SLOTS = RT_CUSTOM_ERROR_START, +}; + +rt_result rtUploadToBuffer(VkBuffer buffer, VmaAllocation allocation, rt_gpu_queue current_owner, const void *data, size_t nbytes); + +/* Wait until transfers to gpu resources are finished. */ +void rtFlushGPUTransfers(void); + +#endif