dump state
this will be the basis of the framegraph rewrite, because the current state is fucked
This commit is contained in:
parent
6f89dd4c46
commit
3bc192b281
19
contrib/simple_vulkan_synchronization/LICENSE.md
Normal file
19
contrib/simple_vulkan_synchronization/LICENSE.md
Normal file
@ -0,0 +1,19 @@
|
||||
Copyright (c) 2017 Tobias Hector
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
153
contrib/simple_vulkan_synchronization/README.md
Normal file
153
contrib/simple_vulkan_synchronization/README.md
Normal file
@ -0,0 +1,153 @@
|
||||
# Simplified Vulkan Synchronization
|
||||
|
||||
In an effort to make Vulkan synchronization more accessible, I created this
|
||||
stb-inspired single-header library in order to somewhat simplify the core
|
||||
synchronization mechanisms in Vulkan - pipeline barriers and events.
|
||||
|
||||
Rather than the complex maze of enums and bitflags in Vulkan - many
|
||||
combinations of which are invalid or nonsensical - this library collapses
|
||||
this to a much shorter list of 40 distinct usage types, and a couple of
|
||||
options for handling image layouts.
|
||||
|
||||
Use of other synchonization mechanisms such as semaphores, fences and render
|
||||
passes are not addressed in this API at present.
|
||||
|
||||
## Usage
|
||||
|
||||
#define the symbol THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION in
|
||||
*one* C/C++ file before the #include of the header; the implementation
|
||||
will be generated in that file.
|
||||
|
||||
## Version
|
||||
|
||||
alpha.9
|
||||
|
||||
Alpha.9 adds the thsvsGetAccessInfo function to translate access types into a thsvsVkAccessInfo.
|
||||
|
||||
## Version History
|
||||
|
||||
alpha.8
|
||||
|
||||
Alpha.8 adds a host preinitialization state for linear images, as well as a number of new access sets for extensions released since the last update.
|
||||
|
||||
alpha.7
|
||||
|
||||
Alpha.7 incorporates a number of fixes from @gwihlidal, and fixes
|
||||
handling of pipeline stages in the presence of multiple access types or
|
||||
barriers in light of other recent changes.
|
||||
|
||||
alpha.6
|
||||
|
||||
Alpha.6 fixes a typo (VK_ACCESS_TYPE_MEMORY_READ|WRITE_BIT should have been VK_ACCESS_MEMORY_READ|WRITE_BIT), and sets the pipeline stage src and dst flag bits to VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT and VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT during initialization, not 0 as per alpha.5
|
||||
|
||||
alpha.5
|
||||
|
||||
Alpha.5 now correctly zeroes out the pipeline stage flags before trying to incrementally set bits on them... common theme here, whoops.
|
||||
|
||||
alpha.4
|
||||
|
||||
Alpha.4 now correctly zeroes out the access types before trying to incrementally set bits on them (!)
|
||||
|
||||
alpha.3
|
||||
|
||||
Alpha.3 changes the following:
|
||||
|
||||
Uniform and vertex buffer access in one enum, matching D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER:
|
||||
- THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER_OR_VERTEX_BUFFER
|
||||
|
||||
Color read *and* write access, matching D3D12_RESOURCE_STATE_RENDER_TARGET:
|
||||
- THSVS_ACCESS_COLOR_ATTACHMENT_READ_WRITE
|
||||
|
||||
Also the "THSVS_ACCESS_\*\_SHADER_READ_SAMPLED_IMAGE" enums have been renamed to the form "THSVS_ACCESS_\*\_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER"
|
||||
|
||||
alpha.2
|
||||
|
||||
Alpha.2 adds four new resource states for "ANY SHADER ACCESS":
|
||||
- THSVS_ACCESS_ANY_SHADER_READ_UNIFORM_BUFFER
|
||||
- THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE
|
||||
- THSVS_ACCESS_ANY_SHADER_READ_OTHER
|
||||
- THSVS_ACCESS_ANY_SHADER_WRITE
|
||||
|
||||
alpha.1
|
||||
|
||||
Alpha.1 adds three new resource states:
|
||||
- THSVS_ACCESS_GENERAL (Any access on the device)
|
||||
- THSVS_ACCESS_DEPTH_ATTACHMENT_WRITE_STENCIL_READ_ONLY (Write access to only the depth aspect of a depth/stencil attachment)
|
||||
- THSVS_ACCESS_STENCIL_ATTACHMENT_WRITE_DEPTH_READ_ONLY (Write access to only the stencil aspect of a depth/stencil attachment)
|
||||
|
||||
It also fixes a couple of typos, and adds clarification as to when extensions need to be enabled to use a feature.
|
||||
|
||||
alpha.0
|
||||
|
||||
This is the very first public release of this library; future revisions
|
||||
of this API may change the API in an incompatible manner as feedback is
|
||||
received.
|
||||
Once the version becomes stable, incompatible changes will only be made
|
||||
to major revisions of the API - minor revisions will only contain
|
||||
bugfixes or minor additions.
|
||||
|
||||
## Memory Allocation
|
||||
|
||||
The thsvsCmdPipelineBarrier and thsvsCmdWaitEvents commands allocate
|
||||
temporary storage for the Vulkan barrier equivalents in order to pass them
|
||||
to the respective Vulkan commands.
|
||||
|
||||
These use the `THSVS_TEMP_ALLOC(size)` and `THSVS_TEMP_FREE(x)` macros,
|
||||
which are by default set to alloca(size) and ((void)(x)), respectively.
|
||||
If you don't want to use stack space or would rather use your own
|
||||
allocation strategy, these can be overridden by defining these macros
|
||||
in before #include-ing the header file with
|
||||
THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION defined.
|
||||
|
||||
I'd rather avoid the need for these allocations in what are likely to be
|
||||
high-traffic commands, but currently just want to ship something - may
|
||||
revisit this at a future date based on feedback.
|
||||
|
||||
## Expressiveness Compared to Raw Vulkan
|
||||
|
||||
Despite the fact that this API is fairly simple, it expresses 99% of
|
||||
what you'd actually ever want to do in practice.
|
||||
Adding the missing expressiveness would result in increased complexity
|
||||
which didn't seem worth the tradeoff - however I would consider adding
|
||||
something for them in future if it becomes an issue.
|
||||
|
||||
Here's a list of known things you can't express:
|
||||
|
||||
* Execution only dependencies cannot be expressed.
|
||||
These are occasionally useful in conjunction with semaphores, or when
|
||||
trying to be clever with scheduling - but their usage is both limited
|
||||
and fairly tricky to get right anyway.
|
||||
* Depth/Stencil Input Attachments can be read in a shader using either
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL or
|
||||
VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL - this library
|
||||
*always* uses VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL.
|
||||
It's possible (though highly unlikely) when aliasing images that this
|
||||
results in unnecessary transitions.
|
||||
|
||||
## Error Checks
|
||||
|
||||
By default, as with the Vulkan API, this library does NOT check for
|
||||
errors.
|
||||
However, a number of optional error checks (`THSVS_ERROR_CHECK_*`) can be
|
||||
enabled by uncommenting the relevant #defines.
|
||||
Currently, error checks simply assert at the point a failure is detected
|
||||
and do not output an error message.
|
||||
I certainly do not claim they capture *all* possible errors, but they
|
||||
capture what should be some of the more common ones.
|
||||
Use of the Vulkan Validation Layers in tandem with this library is
|
||||
strongly recommended:
|
||||
https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers
|
||||
|
||||
## Issues
|
||||
|
||||
This header was clean of warnings using -Wall as of time of publishing
|
||||
on both gcc 4.8.4 and clang 3.5, using the c99 standard.
|
||||
|
||||
There's a potential pitfall in thsvsCmdPipelineBarrier and thsvsCmdWaitEvents
|
||||
where alloca is used for temporary allocations. See
|
||||
[Memory Allocation](#memory-allocation) for more information.
|
||||
|
||||
Testing of this library is so far extremely limited with no immediate
|
||||
plans to add to that - so there's bound to be some amount of bugs.
|
||||
Please raise these issues on the repo issue tracker, or provide a fix
|
||||
via a pull request yourself if you're so inclined.
|
26
contrib/simple_vulkan_synchronization/test/README.md
Normal file
26
contrib/simple_vulkan_synchronization/test/README.md
Normal file
@ -0,0 +1,26 @@
|
||||
# Tests
|
||||
|
||||
`tests.c` defines a number of unit tests to test that various scenarios
|
||||
produce the desired output.
|
||||
Tests are based on the common synchronization examples on the Vulkan-Docs
|
||||
wiki: https://github.com/KhronosGroup/Vulkan-Docs/wiki/Synchronization-Examples.
|
||||
|
||||
## Building
|
||||
|
||||
On a unix based system these tests can be built using:
|
||||
|
||||
`gcc -o tests tests.c -lvulkan`
|
||||
|
||||
## Running
|
||||
|
||||
Running is straightforward:
|
||||
|
||||
`./tests`
|
||||
|
||||
The executable will write out the tests that are run, whether they pass or
|
||||
fail, and what caused them to fail if they did.
|
||||
|
||||
## Adding tests
|
||||
|
||||
If you'd like to add a test, just define a new test in main() as per those
|
||||
that already exist.
|
357
contrib/simple_vulkan_synchronization/test/tests.c
Normal file
357
contrib/simple_vulkan_synchronization/test/tests.c
Normal file
@ -0,0 +1,357 @@
|
||||
// Copyright (c) 2017-2019 Tobias Hector
|
||||
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION
|
||||
#include "../thsvs_simpler_vulkan_synchronization.h"
|
||||
|
||||
void global_barrier_test_array(const char* testName,
|
||||
unsigned int numPrevAccesses,
|
||||
ThsvsAccessType* prevAccesses,
|
||||
unsigned int numNextAccesses,
|
||||
ThsvsAccessType* nextAccesses,
|
||||
VkPipelineStageFlags expectedSrcStageMask,
|
||||
VkPipelineStageFlags expectedDstStageMask,
|
||||
VkAccessFlags expectedSrcAccessMask,
|
||||
VkAccessFlags expectedDstAccessMask)
|
||||
{
|
||||
ThsvsGlobalBarrier barrier = {numPrevAccesses, prevAccesses, numNextAccesses, nextAccesses};
|
||||
|
||||
VkMemoryBarrier vkBarrier = { 0 };
|
||||
VkPipelineStageFlags srcStages = 0;
|
||||
VkPipelineStageFlags dstStages = 0;
|
||||
unsigned int testPassed = 1;
|
||||
|
||||
thsvsGetVulkanMemoryBarrier(barrier, &srcStages, &dstStages, &vkBarrier);
|
||||
|
||||
printf("Test: %s\n", testName);
|
||||
|
||||
if (srcStages != expectedSrcStageMask)
|
||||
{
|
||||
printf("\tUnexpected source stage %0#10X\n", srcStages);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (dstStages != expectedDstStageMask)
|
||||
{
|
||||
printf("\tUnexpected destination stage %0#10X\n", dstStages);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (vkBarrier.srcAccessMask != expectedSrcAccessMask)
|
||||
{
|
||||
printf("\tUnexpected source access mask %0#10X\n", vkBarrier.srcAccessMask);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (vkBarrier.dstAccessMask != expectedDstAccessMask)
|
||||
{
|
||||
printf("\tUnexpected destination access mask %0#10X\n", vkBarrier.dstAccessMask);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (testPassed == 1)
|
||||
printf("\tPASSED\n");
|
||||
else
|
||||
printf("\tFAILED\n");
|
||||
}
|
||||
|
||||
void global_barrier_test(const char* testName,
|
||||
ThsvsAccessType prevAccess,
|
||||
ThsvsAccessType nextAccess,
|
||||
VkPipelineStageFlags expectedSrcStageMask,
|
||||
VkPipelineStageFlags expectedDstStageMask,
|
||||
VkAccessFlags expectedSrcAccessMask,
|
||||
VkAccessFlags expectedDstAccessMask)
|
||||
{
|
||||
global_barrier_test_array(testName, 1, &prevAccess, 1, &nextAccess, expectedSrcStageMask, expectedDstStageMask, expectedSrcAccessMask, expectedDstAccessMask);
|
||||
}
|
||||
|
||||
void image_barrier_test_array(const char* testName,
|
||||
unsigned int numPrevAccesses,
|
||||
ThsvsAccessType* prevAccesses,
|
||||
unsigned int numNextAccesses,
|
||||
ThsvsAccessType* nextAccesses,
|
||||
VkPipelineStageFlags expectedSrcStageMask,
|
||||
VkPipelineStageFlags expectedDstStageMask,
|
||||
VkAccessFlags expectedSrcAccessMask,
|
||||
VkAccessFlags expectedDstAccessMask,
|
||||
VkImageLayout expectedOldLayout,
|
||||
VkImageLayout expectedNewLayout)
|
||||
{
|
||||
ThsvsImageBarrier barrier = {numPrevAccesses, prevAccesses, numNextAccesses, nextAccesses};
|
||||
|
||||
VkImageMemoryBarrier vkBarrier = { 0 };
|
||||
VkPipelineStageFlags srcStages = 0;
|
||||
VkPipelineStageFlags dstStages = 0;
|
||||
unsigned int testPassed = 1;
|
||||
|
||||
thsvsGetVulkanImageMemoryBarrier(barrier, &srcStages, &dstStages, &vkBarrier);
|
||||
|
||||
printf("Test: %s\n", testName);
|
||||
|
||||
if (srcStages != expectedSrcStageMask)
|
||||
{
|
||||
printf("\tUnexpected source stage %0#10X\n", srcStages);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (dstStages != expectedDstStageMask)
|
||||
{
|
||||
printf("\tUnexpected destination stage %0#10X\n", dstStages);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (vkBarrier.srcAccessMask != expectedSrcAccessMask)
|
||||
{
|
||||
printf("\tUnexpected source access mask %0#10X\n", vkBarrier.srcAccessMask);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (vkBarrier.dstAccessMask != expectedDstAccessMask)
|
||||
{
|
||||
printf("\tUnexpected destination access mask %0#10X\n", vkBarrier.dstAccessMask);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (vkBarrier.oldLayout != expectedOldLayout)
|
||||
{
|
||||
printf("\tUnexpected old layout %d\n", vkBarrier.oldLayout);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (vkBarrier.newLayout != expectedNewLayout)
|
||||
{
|
||||
printf("\tUnexpected new layout %d\n", vkBarrier.newLayout);
|
||||
testPassed = 0;
|
||||
}
|
||||
|
||||
if (testPassed == 1)
|
||||
printf("\tPASSED\n");
|
||||
else
|
||||
printf("\tFAILED\n");
|
||||
}
|
||||
|
||||
void image_barrier_test(const char* testName,
|
||||
ThsvsAccessType prevAccess,
|
||||
ThsvsAccessType nextAccess,
|
||||
VkPipelineStageFlags expectedSrcStageMask,
|
||||
VkPipelineStageFlags expectedDstStageMask,
|
||||
VkAccessFlags expectedSrcAccessMask,
|
||||
VkAccessFlags expectedDstAccessMask,
|
||||
VkImageLayout expectedOldLayout,
|
||||
VkImageLayout expectedNewLayout)
|
||||
{
|
||||
image_barrier_test_array(testName, 1, &prevAccess, 1, &nextAccess, expectedSrcStageMask, expectedDstStageMask, expectedSrcAccessMask, expectedDstAccessMask, expectedOldLayout, expectedNewLayout);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
global_barrier_test("Compute write to storage buffer/image, Compute read from storage buffer/image",
|
||||
THSVS_ACCESS_COMPUTE_SHADER_WRITE,
|
||||
THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
global_barrier_test("Compute read from storage buffer, Compute write from storage buffer",
|
||||
THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER,
|
||||
THSVS_ACCESS_COMPUTE_SHADER_WRITE,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0,
|
||||
0);
|
||||
|
||||
global_barrier_test("Compute write to storage buffer, Graphics read as index buffer",
|
||||
THSVS_ACCESS_COMPUTE_SHADER_WRITE,
|
||||
THSVS_ACCESS_INDEX_BUFFER,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_INDEX_READ_BIT);
|
||||
|
||||
{
|
||||
ThsvsAccessType prevAccesses[] = {THSVS_ACCESS_COMPUTE_SHADER_WRITE};
|
||||
ThsvsAccessType nextAccesses[] = {THSVS_ACCESS_INDEX_BUFFER, THSVS_ACCESS_COMPUTE_SHADER_READ_UNIFORM_BUFFER};
|
||||
global_barrier_test_array("Compute write to storage buffer, Graphics read as index buffer & Compute read as uniform buffer",
|
||||
1, prevAccesses,
|
||||
2, nextAccesses,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT);
|
||||
}
|
||||
|
||||
global_barrier_test("Compute write to storage buffer, Graphics read as indirect buffer",
|
||||
THSVS_ACCESS_COMPUTE_SHADER_WRITE,
|
||||
THSVS_ACCESS_INDIRECT_BUFFER,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
|
||||
|
||||
image_barrier_test("Compute write to storage image, Graphics fragment read as sampled image",
|
||||
THSVS_ACCESS_COMPUTE_SHADER_WRITE,
|
||||
THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
{
|
||||
ThsvsAccessType prevAccesses[] = {THSVS_ACCESS_COMPUTE_SHADER_WRITE};
|
||||
ThsvsAccessType nextAccesses[] = {THSVS_ACCESS_INDIRECT_BUFFER, THSVS_ACCESS_FRAGMENT_SHADER_READ_UNIFORM_BUFFER};
|
||||
global_barrier_test_array("Compute write to storage texel buffer, Graphics read as indirect buffer & fragment read as uniform buffer",
|
||||
1, prevAccesses,
|
||||
2, nextAccesses,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT);
|
||||
}
|
||||
|
||||
image_barrier_test("Graphics write to color attachment, Compute read from sampled image",
|
||||
THSVS_ACCESS_COLOR_ATTACHMENT_WRITE,
|
||||
THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
image_barrier_test("Graphics write to depth attachment, Compute read from sampled image",
|
||||
THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE,
|
||||
THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
image_barrier_test("Graphics write to depth attachment, Graphics fragment read from input attachment",
|
||||
THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE,
|
||||
THSVS_ACCESS_FRAGMENT_SHADER_READ_DEPTH_STENCIL_INPUT_ATTACHMENT,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL);
|
||||
|
||||
image_barrier_test("Graphics write to depth attachment, Graphics fragment read from sampled image",
|
||||
THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE,
|
||||
THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
image_barrier_test("Graphics write to color attachment, Graphics fragment read from input attachment",
|
||||
THSVS_ACCESS_COLOR_ATTACHMENT_WRITE,
|
||||
THSVS_ACCESS_FRAGMENT_SHADER_READ_COLOR_INPUT_ATTACHMENT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
image_barrier_test("Graphics write to color attachment, Graphics fragment read from sampled image",
|
||||
THSVS_ACCESS_COLOR_ATTACHMENT_WRITE,
|
||||
THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
image_barrier_test("Graphics write to color attachment, Graphics vertex read from sampled image",
|
||||
THSVS_ACCESS_COLOR_ATTACHMENT_WRITE,
|
||||
THSVS_ACCESS_VERTEX_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
image_barrier_test("Graphics fragment read from sampled image, Graphics write to color attachment",
|
||||
THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,
|
||||
THSVS_ACCESS_COLOR_ATTACHMENT_WRITE,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
0,
|
||||
0,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
|
||||
global_barrier_test("None, Transfer read from buffer",
|
||||
THSVS_ACCESS_NONE,
|
||||
THSVS_ACCESS_TRANSFER_READ,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0,
|
||||
0);
|
||||
|
||||
global_barrier_test("Transfer write to buffer, Graphics read from vertex buffer",
|
||||
THSVS_ACCESS_TRANSFER_WRITE,
|
||||
THSVS_ACCESS_VERTEX_BUFFER,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT);
|
||||
|
||||
image_barrier_test("Transfer write to image, Graphics fragment read from sampled image",
|
||||
THSVS_ACCESS_TRANSFER_WRITE,
|
||||
THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
image_barrier_test("Graphics color attachment write, Presentation",
|
||||
THSVS_ACCESS_COLOR_ATTACHMENT_WRITE,
|
||||
THSVS_ACCESS_PRESENT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
0,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR);
|
||||
|
||||
global_barrier_test("Full pipeline barrier",
|
||||
THSVS_ACCESS_GENERAL,
|
||||
THSVS_ACCESS_GENERAL,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -71,6 +71,21 @@ void Init(void) {
|
||||
.Finalize = PassFinalize};
|
||||
rtBindRenderPass(_framegraph, rtCalculateRenderPassID("pass0", sizeof("pass0") - 1), &bind);
|
||||
rtBindRenderPass(_framegraph, rtCalculateRenderPassID("pass1", sizeof("pass1") - 1), &bind);
|
||||
|
||||
rt_v2 vertices[] = {
|
||||
{ 0, 0.5},
|
||||
{ 0.5, -0.5},
|
||||
{-0.5, -0.5}
|
||||
};
|
||||
rt_buffer_info info = {
|
||||
.type = RT_BUFFER_TYPE_VERTEX,
|
||||
.usage = RT_BUFFER_USAGE_STATIC,
|
||||
.size = sizeof(vertices),
|
||||
.data = vertices,
|
||||
};
|
||||
rt_buffer_handle buf;
|
||||
g_renderer.CreateBuffers(1, &info, &buf);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ typedef struct {
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int sample_count;
|
||||
rt_gpu_semaphore_handle semaphore;
|
||||
rt_gpu_semaphore_handle semaphores[3];
|
||||
rt_render_target_handle api_render_target;
|
||||
} rt_render_target;
|
||||
|
||||
@ -309,6 +309,8 @@ static bool
|
||||
CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) {
|
||||
bool result = false;
|
||||
|
||||
unsigned int max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
|
||||
|
||||
/* TODO(Kevin): determine aliasing opportunities */
|
||||
const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets);
|
||||
for (uint32_t i = 0; i < info->render_target_count; ++i) {
|
||||
@ -339,13 +341,15 @@ CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_are
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < max_frames_in_flight; ++j) {
|
||||
char sem_name[128];
|
||||
rtSPrint(sem_name, 128, "%s - Semaphore", (name) ? name : "Unnamed RT");
|
||||
rtSPrint(sem_name, 128, "%s - Semaphore (%u)", (name) ? name : "Unnamed RT", j);
|
||||
rt_gpu_semaphore_info sem_info = {
|
||||
.initial_value = 0,
|
||||
.name = sem_name,
|
||||
};
|
||||
g_renderer.CreateSemaphores(1, &sem_info, &graph->render_targets[i].semaphore);
|
||||
g_renderer.CreateSemaphores(1, &sem_info, &graph->render_targets[i].semaphores[j]);
|
||||
}
|
||||
|
||||
if (graph->render_targets[i].width != RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
||||
graph->render_targets[i].height != RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
||||
@ -639,6 +643,8 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr
|
||||
|
||||
rt_gpu_semaphore_handle swapchain_available = g_renderer.GetSwapchainAvailableSemaphore();
|
||||
rt_gpu_semaphore_handle render_finished = g_renderer.GetRenderFinishedSemaphore();
|
||||
unsigned int max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
|
||||
unsigned int frame_index = frame_id % max_frames_in_flight;
|
||||
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
if (!temp.arena) {
|
||||
@ -819,6 +825,10 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr
|
||||
if (is_graphics_pass) {
|
||||
g_renderer.CmdEndPass(cmdbuf);
|
||||
}
|
||||
for (uint32_t j = 0; j < write_count; j++) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target);
|
||||
g_renderer.CmdFlushRenderTargetWrite(cmdbuf, rt->api_render_target);
|
||||
}
|
||||
|
||||
rt_gpu_semaphore_handle *wait_semaphores = NULL, *signal_semaphores = NULL;
|
||||
uint64_t *wait_values = NULL, *signal_values = NULL;
|
||||
@ -847,18 +857,18 @@ RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int fr
|
||||
|
||||
for (uint32_t j = 0; j < read_count; ++j) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, reads[j].render_target);
|
||||
wait_semaphores[*wait_count] = rt->semaphore;
|
||||
wait_semaphores[*wait_count] = rt->semaphores[frame_index];
|
||||
wait_values[*wait_count] = signal_value_base + execution_level;
|
||||
*wait_count += 1;
|
||||
}
|
||||
for (uint32_t j = 0; j < write_count; ++j) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target);
|
||||
signal_semaphores[*signal_count] = rt->semaphore;
|
||||
signal_semaphores[*signal_count] = rt->semaphores[frame_index];
|
||||
signal_values[*signal_count] = signal_value_base + execution_level + 1;
|
||||
*signal_count += 1;
|
||||
|
||||
if (signal_value_base >= 200) {
|
||||
wait_semaphores[*wait_count] = rt->semaphore;
|
||||
wait_semaphores[*wait_count] = rt->semaphores[frame_index];
|
||||
wait_values[*wait_count] = signal_value_base - 200 + execution_level + 1;
|
||||
*wait_count += 1;
|
||||
}
|
||||
|
@ -23,12 +23,15 @@ static rt_dynlib _renderer_lib;
|
||||
static bool _renderer_loaded = false;
|
||||
|
||||
RT_DLLEXPORT
|
||||
RT_CVAR_S(rt_Renderer, "Select the render backend. Available options: [vk, null], Default: vk", "vk");
|
||||
RT_CVAR_S(rt_Renderer,
|
||||
"Select the render backend. Available options: [vk, null], Default: vk",
|
||||
"vk");
|
||||
|
||||
#ifdef RT_STATIC_LIB
|
||||
extern void RT_RENDERER_API_FN(RegisterCVars)(void);
|
||||
extern rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *);
|
||||
extern void RT_RENDERER_API_FN(Shutdown)(void);
|
||||
extern unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void);
|
||||
extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int);
|
||||
extern void RT_RENDERER_API_FN(EndFrame)(unsigned int);
|
||||
extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *);
|
||||
@ -49,12 +52,17 @@ extern void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semapho
|
||||
extern uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle);
|
||||
extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void);
|
||||
extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void);
|
||||
extern rt_result
|
||||
RT_RENDERER_API_FN(CreateBuffers)(uint32_t, const rt_buffer_info *, rt_buffer_handle *);
|
||||
extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *);
|
||||
extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle,
|
||||
const rt_cmd_begin_pass_info *);
|
||||
extern void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle);
|
||||
extern void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle,
|
||||
rt_render_target_handle,
|
||||
rt_render_target_state);
|
||||
extern void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle,
|
||||
rt_render_target_handle);
|
||||
#endif
|
||||
|
||||
extern rt_result InitFramegraphManager(void);
|
||||
@ -84,6 +92,7 @@ static bool LoadRenderer(void) {
|
||||
RETRIEVE_SYMBOL(RegisterCVars, rt_register_renderer_cvars_fn);
|
||||
RETRIEVE_SYMBOL(Init, rt_init_renderer_fn);
|
||||
RETRIEVE_SYMBOL(Shutdown, rt_shutdown_renderer_fn);
|
||||
RETRIEVE_SYMBOL(GetMaxFramesInFlight, rt_get_max_frames_in_flight_fn);
|
||||
RETRIEVE_SYMBOL(BeginFrame, rt_begin_frame_fn);
|
||||
RETRIEVE_SYMBOL(EndFrame, rt_end_frame_fn);
|
||||
RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn);
|
||||
@ -98,9 +107,12 @@ static bool LoadRenderer(void) {
|
||||
RETRIEVE_SYMBOL(GetSemaphoreValue, rt_get_gpu_semaphore_value_fn);
|
||||
RETRIEVE_SYMBOL(GetSwapchainAvailableSemaphore, rt_get_swapchain_available_semaphore_fn);
|
||||
RETRIEVE_SYMBOL(GetRenderFinishedSemaphore, rt_get_render_finished_semaphore_fn);
|
||||
RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn);
|
||||
RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn);
|
||||
RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn);
|
||||
RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn);
|
||||
RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn);
|
||||
RETRIEVE_SYMBOL(CmdFlushRenderTargetWrite, rt_cmd_flush_render_target_write_fn);
|
||||
} else {
|
||||
rtReportError("GFX",
|
||||
"Unsupported renderer backend: (%s) %s",
|
||||
@ -113,6 +125,7 @@ static bool LoadRenderer(void) {
|
||||
g_renderer.RegisterCVars = &rtRenRegisterCVars;
|
||||
g_renderer.Init = &rtRenInit;
|
||||
g_renderer.Shutdown = &rtRenShutdown;
|
||||
g_renderer.GetMaxFramesInFlight = &rtRenGetMaxFramesInFlight;
|
||||
g_renderer.BeginFrame = &rtRenBeginFrame;
|
||||
g_renderer.EndFrame = &rtRenEndFrame;
|
||||
g_renderer.CompilePipeline = &rtRenCompilePipeline;
|
||||
@ -127,9 +140,12 @@ static bool LoadRenderer(void) {
|
||||
g_renderer.GetSemaphoreValue = &rtRenGetSemaphoreValue;
|
||||
g_renderer.GetSwapchainAvailableSemaphore = &rtRenGetSwapchainAvailableSemaphore;
|
||||
g_renderer.GetRenderFinishedSemaphore = &rtRenGetRenderFinishedSemaphore;
|
||||
g_renderer.CreateBuffers = &rtRenCreateBuffers;
|
||||
g_renderer.DestroyBuffers = &rtRenDestroyBuffers;
|
||||
g_renderer.CmdBeginPass = &rtRenCmdBeginPass;
|
||||
g_renderer.CmdEndPass = &rtRenCmdEndPass;
|
||||
g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget;
|
||||
g_renderer.CmdFlushRenderTargetWrite = &rtRenCmdFlushRenderTargetWrite;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
@ -210,6 +210,7 @@ typedef struct rt_pipeline_info_s rt_pipeline_info;
|
||||
typedef void rt_register_renderer_cvars_fn(void);
|
||||
typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info);
|
||||
typedef void rt_shutdown_renderer_fn(void);
|
||||
typedef unsigned int rt_get_max_frames_in_flight_fn(void);
|
||||
typedef void rt_begin_frame_fn(unsigned int frame_id);
|
||||
typedef void rt_end_frame_fn(unsigned int frame_id);
|
||||
typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info);
|
||||
@ -239,11 +240,14 @@ typedef void rt_cmd_end_pass_fn(rt_command_buffer_handle cmdbuf);
|
||||
typedef void rt_cmd_transition_render_target_fn(rt_command_buffer_handle cmdbuf,
|
||||
rt_render_target_handle render_target,
|
||||
rt_render_target_state new_state);
|
||||
typedef void rt_cmd_flush_render_target_write_fn(rt_command_buffer_handle cmdbuf,
|
||||
rt_render_target_handle render_target);
|
||||
|
||||
typedef struct {
|
||||
rt_register_renderer_cvars_fn *RegisterCVars;
|
||||
rt_init_renderer_fn *Init;
|
||||
rt_shutdown_renderer_fn *Shutdown;
|
||||
rt_get_max_frames_in_flight_fn *GetMaxFramesInFlight;
|
||||
rt_begin_frame_fn *BeginFrame;
|
||||
rt_end_frame_fn *EndFrame;
|
||||
rt_compile_pipeline_fn *CompilePipeline;
|
||||
@ -265,6 +269,7 @@ typedef struct {
|
||||
rt_cmd_begin_pass_fn *CmdBeginPass;
|
||||
rt_cmd_end_pass_fn *CmdEndPass;
|
||||
rt_cmd_transition_render_target_fn *CmdTransitionRenderTarget;
|
||||
rt_cmd_flush_render_target_write_fn *CmdFlushRenderTargetWrite;
|
||||
} rt_renderer_api;
|
||||
|
||||
#define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name
|
||||
|
@ -30,6 +30,10 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
||||
void RT_RENDERER_API_FN(Shutdown)(void) {
|
||||
}
|
||||
|
||||
unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
|
||||
RT_UNUSED(frame_id);
|
||||
}
|
||||
@ -132,4 +136,10 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmd,
|
||||
RT_UNUSED(cmd);
|
||||
RT_UNUSED(target);
|
||||
RT_UNUSED(state);
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle,
|
||||
rt_render_target_handle render_target) {
|
||||
RT_UNUSED(cmdbuf_handle);
|
||||
RT_UNUSED(render_target);
|
||||
}
|
@ -1,5 +1,10 @@
|
||||
#include "command_buffers.h"
|
||||
#include "gpu.h"
|
||||
#include "transfers.h"
|
||||
#include "resources.h"
|
||||
|
||||
#include "gfx/renderer_api.h"
|
||||
|
||||
#include "runtime/config.h"
|
||||
#include "runtime/threading.h"
|
||||
|
||||
@ -9,14 +14,8 @@
|
||||
RT_CVAR_I(rt_VkMaxBufferCount, "Number of slots for gpu buffers. Default: 1024.", 1024);
|
||||
|
||||
typedef struct rt_buffer_data_s {
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
size_t size;
|
||||
rt_buffer_usage usage;
|
||||
rt_buffer_type type;
|
||||
|
||||
rt_rwlock lock;
|
||||
|
||||
rt_buffer data;
|
||||
uint32_t version;
|
||||
struct rt_buffer_data_s *next_free;
|
||||
} rt_buffer_data;
|
||||
|
||||
@ -26,18 +25,22 @@ static rt_mutex *_list_lock;
|
||||
|
||||
rt_result InitBufferManagement(void) {
|
||||
size_t n = (size_t)rt_VkMaxBufferCount.i;
|
||||
_buffers = malloc(sizeof(rt_buffer_data) * n);
|
||||
_buffers = calloc(n, sizeof(rt_buffer_data));
|
||||
if (!_buffers)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
_first_free = &_buffers[1];
|
||||
for (size_t i = 1; i < n - 1; ++i)
|
||||
_buffers[i].next_free = &_buffers[i + 1];
|
||||
_list_lock = rtCreateMutex();
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
void ShutdownBufferManagement(void) {
|
||||
for (int i = 0; i < rt_VkMaxBufferCount.i; ++i) {
|
||||
if (_buffers[i].buffer == VK_NULL_HANDLE)
|
||||
if (_buffers[i].data.buffer == VK_NULL_HANDLE)
|
||||
continue;
|
||||
vmaDestroyBuffer(g_gpu.allocator, _buffers[i].buffer, _buffers[i].allocation);
|
||||
rtDestroyRWLock(&_buffers[i].lock);
|
||||
vmaDestroyBuffer(g_gpu.allocator, _buffers[i].data.buffer, _buffers[i].data.allocation);
|
||||
rtDestroyRWLock(&_buffers[i].data.lock);
|
||||
memset(&_buffers[i], 0, sizeof(_buffers[i]));
|
||||
}
|
||||
free(_buffers);
|
||||
@ -47,6 +50,33 @@ void ShutdownBufferManagement(void) {
|
||||
|
||||
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers);
|
||||
|
||||
static void UploadViaMap(rt_buffer_data *buffer, const void *data, size_t size) {
|
||||
rtLockWrite(&buffer->data.lock);
|
||||
void *dev_mem = NULL;
|
||||
if (vmaMapMemory(g_gpu.allocator, buffer->data.allocation, &dev_mem) != VK_SUCCESS) {
|
||||
rtReportError("vk", "Unable to map buffer for upload");
|
||||
rtUnlockWrite(&buffer->data.lock);
|
||||
return;
|
||||
}
|
||||
memcpy(dev_mem, data, size);
|
||||
vmaUnmapMemory(g_gpu.allocator, buffer->data.allocation);
|
||||
if (!buffer->data.coherent)
|
||||
vmaFlushAllocation(g_gpu.allocator, buffer->data.allocation, 0, VK_WHOLE_SIZE);
|
||||
rtUnlockWrite(&buffer->data.lock);
|
||||
}
|
||||
|
||||
/* Convenience function that decides between mapping or uploading via transfer buffer */
|
||||
static void UploadData(rt_buffer_data *buffer, const void *data, size_t size) {
|
||||
if (buffer->data.mappable)
|
||||
UploadViaMap(buffer, data, size);
|
||||
else
|
||||
rtUploadToBuffer(buffer->data.buffer,
|
||||
buffer->data.allocation,
|
||||
buffer->data.owner,
|
||||
data,
|
||||
size);
|
||||
}
|
||||
|
||||
rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
|
||||
const rt_buffer_info *info,
|
||||
rt_buffer_handle *p_buffers) {
|
||||
@ -59,11 +89,131 @@ rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
|
||||
rtRenDestroyBuffers(i, p_buffers);
|
||||
return RT_OUT_OF_MEMORY;
|
||||
}
|
||||
_first_free = slot->next_free;
|
||||
rtUnlockMutex(_list_lock);
|
||||
|
||||
VkBufferUsageFlags buffer_usage = 0;
|
||||
switch (info->type) {
|
||||
case RT_BUFFER_TYPE_VERTEX:
|
||||
buffer_usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
||||
break;
|
||||
case RT_BUFFER_TYPE_INDEX:
|
||||
buffer_usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
|
||||
break;
|
||||
case RT_BUFFER_TYPE_STORAGE:
|
||||
buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
break;
|
||||
case RT_BUFFER_TYPE_UNIFORM:
|
||||
buffer_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
|
||||
break;
|
||||
}
|
||||
buffer_usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
|
||||
VkBufferCreateInfo buffer_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.size = info->size,
|
||||
.usage = buffer_usage,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
};
|
||||
|
||||
VmaMemoryUsage alloc_usage = 0;
|
||||
VmaAllocationCreateFlags alloc_flags = 0;
|
||||
switch (info->usage) {
|
||||
case RT_BUFFER_USAGE_STATIC:
|
||||
alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
|
||||
alloc_flags = 0;
|
||||
break;
|
||||
case RT_BUFFER_USAGE_DYNAMIC:
|
||||
alloc_usage = VMA_MEMORY_USAGE_AUTO;
|
||||
alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
|
||||
break;
|
||||
case RT_BUFFER_USAGE_TRANSIENT:
|
||||
alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
|
||||
alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
||||
break;
|
||||
}
|
||||
VmaAllocationCreateInfo alloc_info = {.usage = alloc_usage, .flags = alloc_flags};
|
||||
|
||||
VkResult res = vmaCreateBuffer(g_gpu.allocator,
|
||||
&buffer_info,
|
||||
&alloc_info,
|
||||
&slot->data.buffer,
|
||||
&slot->data.allocation,
|
||||
NULL);
|
||||
if (res != VK_SUCCESS) {
|
||||
rtReportError("vk", "Failed to create a buffer: %u", res);
|
||||
rtLockMutex(_list_lock);
|
||||
slot->next_free = _first_free;
|
||||
_first_free = slot;
|
||||
rtUnlockMutex(_list_lock);
|
||||
if (i > 0)
|
||||
rtRenDestroyBuffers(i, p_buffers);
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
||||
if (!lock_res.ok) {
|
||||
rtReportError("vk", "Failed to create lock for buffer.");
|
||||
vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation);
|
||||
rtLockMutex(_list_lock);
|
||||
slot->next_free = _first_free;
|
||||
_first_free = slot;
|
||||
rtUnlockMutex(_list_lock);
|
||||
if (i > 0)
|
||||
rtRenDestroyBuffers(i, p_buffers);
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
VkMemoryPropertyFlags properties;
|
||||
vmaGetAllocationMemoryProperties(g_gpu.allocator, slot->data.allocation, &properties);
|
||||
slot->data.mappable = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
|
||||
slot->data.coherent = (properties & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
|
||||
|
||||
slot->data.owner = RT_VK_UNOWNED;
|
||||
slot->data.state = RT_BUFFER_STATE_NOT_USED;
|
||||
|
||||
if (info->data)
|
||||
UploadData(slot, info->data, info->size);
|
||||
|
||||
ptrdiff_t index = slot - _buffers;
|
||||
p_buffers[i].index = (uint32_t)index;
|
||||
p_buffers[i].version = slot->version;
|
||||
}
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
if (buffers[i].index >= (uint32_t)rt_VkMaxBufferCount.i)
|
||||
continue;
|
||||
rt_buffer_data *slot = &_buffers[buffers[i].index];
|
||||
if (slot->version != buffers[i].version) {
|
||||
rtLog("vk", "Tried to destroy a buffer with an invalid handle (version mismatch).");
|
||||
continue;
|
||||
}
|
||||
rtLockWrite(&slot->data.lock);
|
||||
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
||||
vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation);
|
||||
slot->data.buffer = VK_NULL_HANDLE;
|
||||
slot->data.allocation = VK_NULL_HANDLE;
|
||||
rtUnlockWrite(&slot->data.lock);
|
||||
rtDestroyRWLock(&slot->data.lock);
|
||||
|
||||
rtLockMutex(_list_lock);
|
||||
slot->next_free = _first_free;
|
||||
_first_free = slot;
|
||||
rtUnlockMutex(_list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
rt_buffer *rtGetBuffer(rt_buffer_handle handle) {
|
||||
if (handle.index >= (uint32_t)rt_VkMaxBufferCount.i)
|
||||
return NULL;
|
||||
rt_buffer_data *slot = &_buffers[handle.index];
|
||||
if (slot->version != handle.version) {
|
||||
rtLog("vk", "Tried to access a buffer with an invalid handle (version mismatch).");
|
||||
return NULL;
|
||||
}
|
||||
return &slot->data;
|
||||
}
|
@ -330,8 +330,11 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
|
||||
rtLog("vk", " - TODO: More Info");
|
||||
rtLog("vk", "Wait Semaphores:");
|
||||
for (uint32_t i = 0; i < wait_count; ++i) {
|
||||
rtLog("vk", " - %u:%u Value %u", info->wait_semaphores[i].version,
|
||||
info->wait_semaphores[i].index, info->wait_values[i]);
|
||||
rtLog("vk",
|
||||
" - %u:%u Value %u",
|
||||
info->wait_semaphores[i].version,
|
||||
info->wait_semaphores[i].index,
|
||||
info->wait_values[i]);
|
||||
}
|
||||
rtLog("vk", "Signal Semaphores:");
|
||||
for (uint32_t i = 0; i < signal_count; ++i) {
|
||||
@ -417,7 +420,8 @@ rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
|
||||
const VkSemaphore *signal_semaphores,
|
||||
const uint32_t *signal_values,
|
||||
uint32_t signal_semaphore_count,
|
||||
rt_gpu_queue queue) {
|
||||
rt_gpu_queue queue,
|
||||
VkFence fence) {
|
||||
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
if (!temp.arena)
|
||||
@ -476,7 +480,7 @@ rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
|
||||
.pCommandBufferInfos = &command_buffer_info,
|
||||
};
|
||||
|
||||
if (vkQueueSubmit2(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
|
||||
if (vkQueueSubmit2(target_queue, 1, &submit_info, fence) != VK_SUCCESS) {
|
||||
rtLog("vk", "vkQueueSubmit failed.");
|
||||
result = RT_UNKNOWN_ERROR;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
|
||||
const VkSemaphore *signal_semaphores,
|
||||
const uint32_t *signal_values,
|
||||
uint32_t signal_semaphore_count,
|
||||
rt_gpu_queue queue);
|
||||
rt_gpu_queue queue,
|
||||
VkFence fence);
|
||||
|
||||
#endif
|
||||
|
@ -9,6 +9,13 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define USE_SIMPLE_SYNC_LIB 0
|
||||
|
||||
#if USE_SIMPLE_SYNC_LIB
|
||||
#include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h"
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
/* Retrieve the VkCommandBuffer as varname, or return */
|
||||
#define GET_CMDBUF(varname, handle) \
|
||||
VkCommandBuffer varname = rtGetCommandBuffer((handle)); \
|
||||
@ -196,8 +203,8 @@ static void ExecuteRenderTargetBarrier(rt_render_target *rt,
|
||||
/* Determine access flags */
|
||||
VkPipelineStageFlags2 src_stage = 0;
|
||||
VkPipelineStageFlags2 dst_stage = 0;
|
||||
VkAccessFlags2 src_access = 0;
|
||||
VkAccessFlags2 dst_access = 0;
|
||||
VkAccessFlags2 src_access = 0;
|
||||
VkAccessFlags2 dst_access = 0;
|
||||
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
||||
src_access =
|
||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
@ -265,7 +272,7 @@ static void DoLayoutTransition(rt_render_target *rt,
|
||||
uint32_t image_index,
|
||||
rt_render_target_state new_state,
|
||||
VkCommandBuffer cmdbuf) {
|
||||
|
||||
#if !USE_SIMPLE_SYNC_LIB
|
||||
/* Determine old and new layout */
|
||||
VkImageLayout old_layout;
|
||||
switch (rt->states[image_index]) {
|
||||
@ -274,7 +281,7 @@ static void DoLayoutTransition(rt_render_target *rt,
|
||||
break;
|
||||
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
|
||||
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
|
||||
old_layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL;
|
||||
old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
break;
|
||||
default:
|
||||
old_layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
@ -287,7 +294,7 @@ static void DoLayoutTransition(rt_render_target *rt,
|
||||
break;
|
||||
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
|
||||
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
|
||||
new_layout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL;
|
||||
new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
break;
|
||||
default:
|
||||
new_layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
@ -306,7 +313,7 @@ static void DoLayoutTransition(rt_render_target *rt,
|
||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
|
||||
: VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
|
||||
|
||||
VkPipelineStageFlags2 src_stage = 0;
|
||||
VkPipelineStageFlags2 dst_stage = 0;
|
||||
/* Determine access flags */
|
||||
@ -323,23 +330,26 @@ static void DoLayoutTransition(rt_render_target *rt,
|
||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
|
||||
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
|
||||
src_access = VK_ACCESS_2_SHADER_WRITE_BIT;
|
||||
src_stage = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
|
||||
src_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
|
||||
src_stage =
|
||||
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT; // VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
|
||||
// VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
|
||||
}
|
||||
|
||||
if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
||||
dst_access =
|
||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
dst_stage =
|
||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT
|
||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT;
|
||||
dst_stage = (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
|
||||
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
|
||||
dst_access = VK_ACCESS_2_SHADER_READ_BIT;
|
||||
dst_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
||||
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
|
||||
dst_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
|
||||
dst_stage = VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
|
||||
;
|
||||
}
|
||||
|
||||
VkImageMemoryBarrier2 image_barrier = {
|
||||
@ -372,7 +382,64 @@ static void DoLayoutTransition(rt_render_target *rt,
|
||||
#ifdef RT_DEBUG
|
||||
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
|
||||
#endif
|
||||
#else
|
||||
ThsvsAccessType prev_access;
|
||||
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
||||
if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
prev_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE;
|
||||
else
|
||||
prev_access = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE;
|
||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) {
|
||||
prev_access = THSVS_ACCESS_NONE;
|
||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) {
|
||||
prev_access = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER;
|
||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) {
|
||||
prev_access = THSVS_ACCESS_ANY_SHADER_READ_OTHER;
|
||||
}
|
||||
|
||||
ThsvsAccessType next_accesses[2];
|
||||
uint32_t next_access_count = 0;
|
||||
if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
||||
if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) {
|
||||
next_accesses[0] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ;
|
||||
next_accesses[1] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE;
|
||||
} else {
|
||||
next_accesses[0] = THSVS_ACCESS_COLOR_ATTACHMENT_READ;
|
||||
next_accesses[1] = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE;
|
||||
}
|
||||
next_access_count = 2;
|
||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) {
|
||||
next_accesses[0] = THSVS_ACCESS_NONE;
|
||||
next_access_count = 1;
|
||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) {
|
||||
next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER;
|
||||
next_access_count = 1;
|
||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) {
|
||||
next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_OTHER;
|
||||
next_accesses[1] = THSVS_ACCESS_ANY_SHADER_WRITE;
|
||||
next_access_count = 2;
|
||||
}
|
||||
VkImageAspectFlags aspect_mask =
|
||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
|
||||
: VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
ThsvsImageBarrier barrier = {0};
|
||||
barrier.image = rt->image[image_index];
|
||||
barrier.pPrevAccesses = &prev_access;
|
||||
barrier.prevAccessCount = 1;
|
||||
barrier.prevLayout = THSVS_IMAGE_LAYOUT_OPTIMAL;
|
||||
barrier.nextAccessCount = next_access_count;
|
||||
barrier.pNextAccesses = next_accesses;
|
||||
barrier.nextLayout = THSVS_IMAGE_LAYOUT_OPTIMAL;
|
||||
barrier.discardContents = false;
|
||||
barrier.subresourceRange.aspectMask = aspect_mask;
|
||||
barrier.subresourceRange.baseArrayLayer = 0;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
barrier.subresourceRange.baseMipLevel = 0;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
thsvsCmdPipelineBarrier(cmdbuf, NULL, 0, NULL, 1, &barrier);
|
||||
|
||||
#endif
|
||||
rt->states[image_index] = new_state;
|
||||
}
|
||||
|
||||
@ -396,3 +463,48 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb
|
||||
else
|
||||
ExecuteRenderTargetBarrier(rt, image_index, cmdbuf);
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle,
|
||||
rt_render_target_handle render_target) {
|
||||
GET_CMDBUF(cmdbuf, cmdbuf_handle)
|
||||
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
||||
if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) {
|
||||
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
|
||||
}
|
||||
rt_render_target *rt = rtGetRenderTarget(render_target);
|
||||
if (!rt) {
|
||||
rtLog("vk", "Tried to flush invalid render target");
|
||||
return;
|
||||
}
|
||||
|
||||
VkAccessFlags2 src_access;
|
||||
VkPipelineStageFlags2 src_stage;
|
||||
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
||||
src_access =
|
||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
src_stage =
|
||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
||||
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
|
||||
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE){ /* SAMPLED_IMAGE or STORAGE_IMAGE */
|
||||
src_access = VK_ACCESS_2_MEMORY_WRITE_BIT;
|
||||
src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
||||
}else {
|
||||
return;
|
||||
}
|
||||
|
||||
VkMemoryBarrier2 barrier = {.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
|
||||
.srcAccessMask = src_access,
|
||||
.srcStageMask = src_stage,
|
||||
.dstAccessMask = 0,
|
||||
.dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT};
|
||||
VkDependencyInfo dep = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.memoryBarrierCount = 1,
|
||||
.pMemoryBarriers = &barrier,
|
||||
};
|
||||
vkCmdPipelineBarrier2(cmdbuf, &dep);
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
#include "gpu.h"
|
||||
#include "render_targets.h"
|
||||
#include "swapchain.h"
|
||||
#include "transfers.h"
|
||||
|
||||
#include "gfx/renderer_api.h"
|
||||
|
||||
@ -114,7 +115,8 @@ void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
|
||||
&frame->swapchain_transitioned,
|
||||
NULL,
|
||||
1,
|
||||
RT_GRAPHICS_QUEUE) != RT_SUCCESS) {
|
||||
RT_GRAPHICS_QUEUE,
|
||||
VK_NULL_HANDLE) != RT_SUCCESS) {
|
||||
rtReportError("vk", "Failed to submit the layout transition for the swapchain image.");
|
||||
return;
|
||||
}
|
||||
@ -132,4 +134,6 @@ void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
|
||||
if (res != VK_SUCCESS) {
|
||||
rtReportError("vk", "vkQueuePresentKHR failed: %u", res);
|
||||
}
|
||||
|
||||
rtFlushGPUTransfers();
|
||||
}
|
@ -9,6 +9,9 @@
|
||||
|
||||
#include "gfx/renderer_api.h"
|
||||
|
||||
/* Used to mark a resource as not owned by a particular queue */
|
||||
#define RT_VK_UNOWNED 255
|
||||
|
||||
/* Minimum supported value of g_gpu.max_frames_in_flight */
|
||||
#define RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT 2
|
||||
|
||||
@ -97,6 +100,8 @@ VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count);
|
||||
|
||||
VkQueue rtGetQueue(rt_gpu_queue queue);
|
||||
|
||||
uint32_t rtGetQueueFamily(rt_gpu_queue queue);
|
||||
|
||||
const char *rtVkFormatToString(VkFormat format);
|
||||
|
||||
#endif
|
||||
|
@ -55,6 +55,20 @@ VkQueue rtGetQueue(rt_gpu_queue queue) {
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t rtGetQueueFamily(rt_gpu_queue queue) {
|
||||
switch (queue) {
|
||||
case RT_GRAPHICS_QUEUE:
|
||||
return g_gpu.graphics_family;
|
||||
case RT_COMPUTE_QUEUE:
|
||||
return g_gpu.compute_family;
|
||||
case RT_TRANSFER_QUEUE:
|
||||
return g_gpu.transfer_family;
|
||||
default:
|
||||
return UINT32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const char *rtVkFormatToString(VkFormat format) {
|
||||
switch (format) {
|
||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
||||
|
@ -648,6 +648,8 @@ extern rt_result InitializeSempahoreManagement(void);
|
||||
extern void ShutdownSemaphoreManagement(void);
|
||||
extern rt_result InitBufferManagement(void);
|
||||
extern void ShutdownBufferManagement(void);
|
||||
extern rt_result InitializeTransfers(void);
|
||||
extern void ShutdownTransfers(void);
|
||||
|
||||
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
||||
rtLog("vk", "Init");
|
||||
@ -697,6 +699,9 @@ rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
res = InitBufferManagement();
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
res = InitializeTransfers();
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
res = rtCreateSwapchain();
|
||||
@ -711,6 +716,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) {
|
||||
rtLog("vk", "Shutdown");
|
||||
vkDeviceWaitIdle(g_gpu.device);
|
||||
rtDestroySwapchain();
|
||||
ShutdownTransfers();
|
||||
ShutdownBufferManagement();
|
||||
ShutdownCommandBufferManagement();
|
||||
ShutdownSemaphoreManagement();
|
||||
@ -725,3 +731,7 @@ void RT_RENDERER_API_FN(Shutdown)(void) {
|
||||
#endif
|
||||
vkDestroyInstance(g_gpu.instance, g_gpu.alloc_cb);
|
||||
}
|
||||
|
||||
unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
|
||||
return g_gpu.max_frames_in_flight;
|
||||
}
|
@ -15,6 +15,7 @@ if vk_dep.found()
|
||||
'pipelines.h',
|
||||
'render_targets.h',
|
||||
'swapchain.h',
|
||||
'transfers.h',
|
||||
|
||||
'buffers.c',
|
||||
'command_buffers.c',
|
||||
@ -26,6 +27,9 @@ if vk_dep.found()
|
||||
'pipelines.c',
|
||||
'render_targets.c',
|
||||
'swapchain.c',
|
||||
'transfers.c',
|
||||
|
||||
'simple_sync_impl.cpp',
|
||||
|
||||
# Contrib Sources
|
||||
'../../../contrib/volk/volk.h',
|
||||
|
80
src/renderer/vk/resources.h
Normal file
80
src/renderer/vk/resources.h
Normal file
@ -0,0 +1,80 @@
|
||||
#ifndef RT_VK_RESOURCES_H
|
||||
#define RT_VK_RESOURCES_H
|
||||
|
||||
/* Buffers and images */
|
||||
|
||||
#include "gpu.h"
|
||||
|
||||
#include "runtime/threading.h"
|
||||
|
||||
typedef enum {
|
||||
RT_BUFFER_STATE_INVALID,
|
||||
|
||||
RT_BUFFER_STATE_NOT_USED,
|
||||
|
||||
RT_BUFFER_STATE_IN_USE,
|
||||
|
||||
RT_BUFFER_STATE_IN_TRANSFER,
|
||||
} rt_buffer_state;
|
||||
|
||||
typedef struct {
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
size_t size;
|
||||
rt_buffer_usage usage;
|
||||
rt_buffer_type type;
|
||||
rt_buffer_state state;
|
||||
rt_rwlock lock;
|
||||
|
||||
bool mappable;
|
||||
bool coherent;
|
||||
|
||||
rt_gpu_queue owner;
|
||||
} rt_buffer;
|
||||
|
||||
|
||||
rt_buffer *rtGetBuffer(rt_buffer_handle handle);
|
||||
|
||||
/* Helper functions for accessing buffers */
|
||||
|
||||
RT_INLINE rt_gpu_queue rtGetBufferOwner(rt_buffer_handle handle) {
|
||||
rt_buffer *buffer = rtGetBuffer(handle);
|
||||
rt_gpu_queue owner = RT_VK_UNOWNED;
|
||||
if (buffer) {
|
||||
rtLockRead(&buffer->lock);
|
||||
owner = buffer->owner;
|
||||
rtUnlockRead(&buffer->lock);
|
||||
}
|
||||
return owner;
|
||||
}
|
||||
|
||||
RT_INLINE void rtSetBufferOwner(rt_buffer_handle handle, rt_gpu_queue owner) {
|
||||
rt_buffer *buffer = rtGetBuffer(handle);
|
||||
if (buffer) {
|
||||
rtLockWrite(&buffer->lock);
|
||||
buffer->owner = owner;
|
||||
rtUnlockWrite(&buffer->lock);
|
||||
}
|
||||
}
|
||||
|
||||
RT_INLINE rt_buffer_state rtGetBufferState(rt_buffer_handle handle) {
|
||||
rt_buffer *buffer = rtGetBuffer(handle);
|
||||
rt_buffer_state state = RT_BUFFER_STATE_INVALID;
|
||||
if (buffer) {
|
||||
rtLockRead(&buffer->lock);
|
||||
state = buffer->state;
|
||||
rtUnlockRead(&buffer->lock);
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
RT_INLINE void rtSetBufferState(rt_buffer_handle handle, rt_buffer_state state) {
|
||||
rt_buffer *buffer = rtGetBuffer(handle);
|
||||
if (buffer) {
|
||||
rtLockWrite(&buffer->lock);
|
||||
buffer->state = state;
|
||||
rtUnlockWrite(&buffer->lock);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
6
src/renderer/vk/simple_sync_impl.cpp
Normal file
6
src/renderer/vk/simple_sync_impl.cpp
Normal file
@ -0,0 +1,6 @@
|
||||
#include "gpu.h"
|
||||
|
||||
extern "C" {
|
||||
#define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION
|
||||
#include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h"
|
||||
}
|
263
src/renderer/vk/transfers.c
Normal file
263
src/renderer/vk/transfers.c
Normal file
@ -0,0 +1,263 @@
|
||||
#include "transfers.h"
|
||||
#include "command_buffers.h"
|
||||
|
||||
#include "runtime/config.h"
|
||||
#include "runtime/mem_arena.h"
|
||||
#include "runtime/threading.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
RT_CVAR_I(rt_VkTransferSlotCount,
|
||||
"Number of available transfer slots per frame. Default: 512",
|
||||
512);
|
||||
|
||||
/* This is a temporary solution. We probably should keep a pool of buffers
|
||||
* to avoid re-creating the buffers all the time. */
|
||||
|
||||
typedef struct {
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
bool requires_flush;
|
||||
} rt_transfer_buffer;
|
||||
|
||||
typedef struct {
|
||||
rt_transfer_buffer tbuf;
|
||||
VkFence fence;
|
||||
VkSemaphore ownership_transfer;
|
||||
} rt_transfer;
|
||||
|
||||
static rt_transfer *_transfers;
|
||||
static uint32_t _transfer_count;
|
||||
static rt_mutex *_transfer_lock;
|
||||
|
||||
static rt_transfer_buffer AcquireTransferBuffer(size_t size) {
|
||||
rt_transfer_buffer tbuf = {VK_NULL_HANDLE};
|
||||
|
||||
VkBufferCreateInfo buffer_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.size = size,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
};
|
||||
VmaAllocationCreateInfo alloc_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO,
|
||||
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
|
||||
};
|
||||
|
||||
if (vmaCreateBuffer(g_gpu.allocator,
|
||||
&buffer_info,
|
||||
&alloc_info,
|
||||
&tbuf.buffer,
|
||||
&tbuf.allocation,
|
||||
NULL) == VK_SUCCESS) {
|
||||
VkMemoryPropertyFlags props;
|
||||
vmaGetAllocationMemoryProperties(g_gpu.allocator, tbuf.allocation, &props);
|
||||
tbuf.requires_flush = (props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0;
|
||||
}
|
||||
return tbuf;
|
||||
}
|
||||
|
||||
static void ReturnTransferBuffer(rt_transfer_buffer buffer) {
|
||||
vmaDestroyBuffer(g_gpu.allocator, buffer.buffer, buffer.allocation);
|
||||
}
|
||||
|
||||
static void CopyToTransferBuffer(rt_transfer_buffer buffer, const void *data, size_t n) {
|
||||
void *tmem = NULL;
|
||||
vmaMapMemory(g_gpu.allocator, buffer.allocation, &tmem);
|
||||
RT_ASSERT(tmem, "Transfer Buffer memory must be mappable.");
|
||||
memcpy(tmem, data, n);
|
||||
vmaUnmapMemory(g_gpu.allocator, buffer.allocation);
|
||||
if (buffer.requires_flush)
|
||||
vmaFlushAllocation(g_gpu.allocator, buffer.allocation, 0, n);
|
||||
}
|
||||
|
||||
rt_result InitializeTransfers(void) {
|
||||
_transfer_lock = rtCreateMutex();
|
||||
if (!_transfer_lock)
|
||||
return RT_UNKNOWN_ERROR;
|
||||
_transfers = calloc((size_t)rt_VkTransferSlotCount.i, sizeof(rt_transfer));
|
||||
if (!_transfers) {
|
||||
rtDestroyMutex(_transfer_lock);
|
||||
return RT_OUT_OF_MEMORY;
|
||||
}
|
||||
_transfer_count = 0;
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
void ShutdownTransfers(void) {
|
||||
rtDestroyMutex(_transfer_lock);
|
||||
for (int i = 0; i < rt_VkTransferSlotCount.i; ++i) {
|
||||
if (_transfers[i].fence)
|
||||
vkDestroyFence(g_gpu.device, _transfers[i].fence, g_gpu.alloc_cb);
|
||||
}
|
||||
free(_transfers);
|
||||
}
|
||||
|
||||
#define TRANSFER_FAILED -1
|
||||
#define TRANSFER_NOT_NEEDED 0
|
||||
#define TRANSFER_STARTED 1
|
||||
|
||||
static int AcquireBufferOwnership(rt_transfer *transfer,
|
||||
VkBuffer buffer,
|
||||
rt_gpu_queue current_owner,
|
||||
VkCommandBuffer transfer_cmd) {
|
||||
if (!transfer->ownership_transfer) {
|
||||
VkSemaphoreCreateInfo sem_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
};
|
||||
if (vkCreateSemaphore(g_gpu.device,
|
||||
&sem_info,
|
||||
g_gpu.alloc_cb,
|
||||
&transfer->ownership_transfer) != VK_SUCCESS) {
|
||||
rtReportError("vk", "Failed to create an ownership transfer semaphore.");
|
||||
return TRANSFER_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t src_family = rtGetQueueFamily(current_owner);
|
||||
uint32_t dst_family = rtGetQueueFamily(RT_TRANSFER_QUEUE);
|
||||
if (src_family == dst_family)
|
||||
return TRANSFER_NOT_NEEDED;
|
||||
|
||||
VkCommandBuffer cmd = rtAllocSingleCommandBuffer(current_owner);
|
||||
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
|
||||
vkBeginCommandBuffer(cmd, &begin_info);
|
||||
VkBufferMemoryBarrier2 release_barrier = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
||||
.buffer = buffer,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
.srcStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
||||
.srcAccessMask = 0,
|
||||
.srcQueueFamilyIndex = src_family,
|
||||
.dstQueueFamilyIndex = dst_family,
|
||||
};
|
||||
VkDependencyInfo dep = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pBufferMemoryBarriers = &release_barrier,
|
||||
.bufferMemoryBarrierCount = 1};
|
||||
vkCmdPipelineBarrier2(cmd, &dep);
|
||||
vkEndCommandBuffer(cmd);
|
||||
|
||||
VkBufferMemoryBarrier2 acquire_barrier = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
||||
.buffer = buffer,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
.dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.srcQueueFamilyIndex = src_family,
|
||||
.dstQueueFamilyIndex = dst_family,
|
||||
};
|
||||
VkDependencyInfo dep2 = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pBufferMemoryBarriers = &acquire_barrier,
|
||||
.bufferMemoryBarrierCount = 1};
|
||||
vkCmdPipelineBarrier2(transfer_cmd, &dep2);
|
||||
|
||||
/* Only transfer the ownership when the frame is finished */
|
||||
VkSemaphore wait_semaphore = VK_NULL_HANDLE;
|
||||
rt_frame_data *frame = rtGetFrameData(g_gpu.current_frame_id);
|
||||
wait_semaphore = frame->render_finished;
|
||||
|
||||
uint32_t dummy = 0;
|
||||
if (rtSubmitSingleCommandBuffer(cmd,
|
||||
&wait_semaphore,
|
||||
&dummy,
|
||||
1,
|
||||
&transfer->ownership_transfer,
|
||||
&dummy,
|
||||
1,
|
||||
current_owner,
|
||||
VK_NULL_HANDLE) != RT_SUCCESS)
|
||||
return TRANSFER_FAILED;
|
||||
return TRANSFER_STARTED;
|
||||
}
|
||||
|
||||
rt_result rtUploadToBuffer(VkBuffer buffer,
|
||||
VmaAllocation allocation,
|
||||
rt_gpu_queue current_owner,
|
||||
const void *data,
|
||||
size_t nbytes) {
|
||||
rtLockMutex(_transfer_lock);
|
||||
rt_transfer *transfer =
|
||||
(int)_transfer_count < rt_VkTransferSlotCount.i ? &_transfers[_transfer_count++] : NULL;
|
||||
rtUnlockMutex(_transfer_lock);
|
||||
if (!transfer)
|
||||
return RT_NO_TRANSFER_SLOTS;
|
||||
|
||||
transfer->tbuf = AcquireTransferBuffer(nbytes);
|
||||
if (!transfer->tbuf.buffer) {
|
||||
return RT_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
CopyToTransferBuffer(transfer->tbuf, data, nbytes);
|
||||
|
||||
if (!transfer->fence) {
|
||||
VkFenceCreateInfo fence_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
};
|
||||
if (vkCreateFence(g_gpu.device, &fence_info, g_gpu.alloc_cb, &transfer->fence) !=
|
||||
VK_SUCCESS) {
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
VkCommandBuffer cmd = rtAllocSingleCommandBuffer(RT_TRANSFER_QUEUE);
|
||||
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
|
||||
vkBeginCommandBuffer(cmd, &begin_info);
|
||||
|
||||
bool requires_ownership_transfer =
|
||||
(current_owner != RT_TRANSFER_QUEUE && current_owner != RT_VK_UNOWNED);
|
||||
if (requires_ownership_transfer) {
|
||||
int did_transfer = AcquireBufferOwnership(transfer, buffer, current_owner, cmd);
|
||||
if (did_transfer == -1)
|
||||
return RT_UNKNOWN_ERROR;
|
||||
else if (did_transfer == TRANSFER_NOT_NEEDED)
|
||||
requires_ownership_transfer = false;
|
||||
}
|
||||
VkBufferCopy region = {.srcOffset = 0, .dstOffset = 0, .size = nbytes};
|
||||
vkCmdCopyBuffer(cmd, transfer->tbuf.buffer, buffer, 1, ®ion);
|
||||
vkEndCommandBuffer(cmd);
|
||||
|
||||
uint32_t dummy = 0;
|
||||
return rtSubmitSingleCommandBuffer(cmd,
|
||||
requires_ownership_transfer ? &transfer->ownership_transfer
|
||||
: NULL,
|
||||
requires_ownership_transfer ? &dummy : NULL,
|
||||
requires_ownership_transfer ? 1 : 0,
|
||||
NULL,
|
||||
NULL,
|
||||
0,
|
||||
RT_TRANSFER_QUEUE,
|
||||
transfer->fence);
|
||||
}
|
||||
|
||||
/* Wait until transfers to gpu resources are finished. */
|
||||
void rtFlushGPUTransfers(void) {
|
||||
if (_transfer_count == 0)
|
||||
return;
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
RT_ASSERT(temp.arena, "Could not get a temporary arena for flushing gpu transfers.");
|
||||
rtLockMutex(_transfer_lock);
|
||||
VkFence *fences = RT_ARENA_PUSH_ARRAY(temp.arena, VkFence, _transfer_count);
|
||||
if (!fences) {
|
||||
rtReportError("vk", "Failed to allocate fences array for flushing gpu transfers.");
|
||||
rtUnlockMutex(_transfer_lock);
|
||||
return;
|
||||
}
|
||||
uint32_t count = 0;
|
||||
for (uint32_t i = 0; i < _transfer_count; ++i) {
|
||||
if (!_transfers[i].fence || !_transfers[i].tbuf.buffer)
|
||||
continue;
|
||||
fences[count++] = _transfers[i].fence;
|
||||
}
|
||||
vkWaitForFences(g_gpu.device, count, fences, VK_TRUE, UINT64_MAX);
|
||||
for (uint32_t i = 0; i < _transfer_count; ++i) {
|
||||
if (!_transfers[i].fence || !_transfers[i].tbuf.buffer)
|
||||
continue;
|
||||
ReturnTransferBuffer(_transfers[i].tbuf);
|
||||
}
|
||||
_transfer_count = 0;
|
||||
rtUnlockMutex(_transfer_lock);
|
||||
}
|
16
src/renderer/vk/transfers.h
Normal file
16
src/renderer/vk/transfers.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef RT_VK_TRANSFERS_H
|
||||
#define RT_VK_TRANSFERS_H
|
||||
|
||||
#include "gpu.h"
|
||||
#include "runtime/runtime.h"
|
||||
|
||||
enum {
|
||||
RT_NO_TRANSFER_SLOTS = RT_CUSTOM_ERROR_START,
|
||||
};
|
||||
|
||||
rt_result rtUploadToBuffer(VkBuffer buffer, VmaAllocation allocation, rt_gpu_queue current_owner, const void *data, size_t nbytes);
|
||||
|
||||
/* Wait until transfers to gpu resources are finished. */
|
||||
void rtFlushGPUTransfers(void);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user