Compare commits
33 Commits
temp/meshl
...
main
Author | SHA1 | Date | |
---|---|---|---|
|
a6f3a04993 | ||
|
33e596c9d6 | ||
|
823b9ef9af | ||
|
033eac90bb | ||
|
96b244e4e3 | ||
|
a79fd56051 | ||
|
beba96b915 | ||
|
4febd1b3fa | ||
|
faa84d3aa9 | ||
|
8d303fff9f | ||
|
3b5f7d0029 | ||
|
03f61b31ec | ||
|
e25dde131a | ||
|
3a7bca385c | ||
|
e0904e84c4 | ||
|
3059fce861 | ||
|
5709cc98a5 | ||
|
62139b718a | ||
|
b70cafecef | ||
|
dd76b924c0 | ||
|
25006139f1 | ||
|
ca5d8ad8f0 | ||
|
faa2b904ea | ||
|
578722a0c5 | ||
|
77f0db7a7c | ||
|
e371a24761 | ||
|
4ab483c003 | ||
|
50bd92dfcb | ||
|
88fd8a3059 | ||
|
1a389c3759 | ||
|
f232a94f92 | ||
|
f98d64b927 | ||
|
92fbe1ece4 |
@ -16,7 +16,7 @@ on:
|
|||||||
- "**.hpp"
|
- "**.hpp"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
cross_compile:
|
||||||
name: Cross Compile with ming64
|
name: Cross Compile with ming64
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
@ -37,6 +37,7 @@ jobs:
|
|||||||
chmod +x ./scripts/download_dxc_cross.sh
|
chmod +x ./scripts/download_dxc_cross.sh
|
||||||
./scripts/download_dxc_cross.sh
|
./scripts/download_dxc_cross.sh
|
||||||
meson setup --cross-file scripts/x86_64-w64-mingw32.txt -Db_sanitize=none build
|
meson setup --cross-file scripts/x86_64-w64-mingw32.txt -Db_sanitize=none build
|
||||||
|
cd build && meson devenv && cd ..
|
||||||
- name: Compile
|
- name: Compile
|
||||||
run: meson compile -C build
|
run: meson compile -C build
|
||||||
- name: Run Tests
|
- name: Run Tests
|
||||||
@ -47,3 +48,37 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: ${{ matrix.os }}_Meson_Testlog
|
name: ${{ matrix.os }}_Meson_Testlog
|
||||||
path: build/meson-logs/testlog.txt
|
path: build/meson-logs/testlog.txt
|
||||||
|
compile_linux:
|
||||||
|
name: Compile for linux
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ ubuntu-latest ]
|
||||||
|
meson_version: [ "1.4.0" ]
|
||||||
|
steps:
|
||||||
|
- name: Checkout Code
|
||||||
|
uses: https://gitea.com/ScMi1/checkout@v1
|
||||||
|
- name: Install APT Dependencies
|
||||||
|
run: |
|
||||||
|
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
||||||
|
apt update -y
|
||||||
|
apt install -y python3-pip cmake clang build-essential libwayland-dev libxkbcommon-dev xorg-dev vulkan-sdk
|
||||||
|
- name: Install PIP Dependencies
|
||||||
|
run: python -m pip install meson==${{ matrix.meson_version }} ninja
|
||||||
|
- name: Configure Project
|
||||||
|
run: |
|
||||||
|
chmod +x ./scripts/download_dxc.sh
|
||||||
|
./scripts/download_dxc.sh
|
||||||
|
meson setup -Dbuild_dx11=false -Dbuild_vk=true -Dstatic_renderer=vk -Denable_dx11_shader_compiler=false -Db_sanitize=none build
|
||||||
|
cd build && meson devenv && cd ..
|
||||||
|
- name: Compile
|
||||||
|
run: meson compile -C build
|
||||||
|
- name: Run Tests
|
||||||
|
run: meson test -C build/ -v
|
||||||
|
- name: Upload Test Log
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
if: failure()
|
||||||
|
with:
|
||||||
|
name: ${{ matrix.os }}_Meson_Testlog
|
||||||
|
path: build/meson-logs/testlog.txt
|
6
cfg/launcher.cfg
Normal file
6
cfg/launcher.cfg
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
l_Renderer = vk
|
||||||
|
l_WindowTitle = rtengine
|
||||||
|
l_WindowWidth = 1024
|
||||||
|
l_WindowHeight = 768
|
||||||
|
l_WindowMode = 0
|
||||||
|
l_GameLib = (null)
|
@ -1,179 +0,0 @@
|
|||||||
# 3.0.1 (2022-05-26)
|
|
||||||
|
|
||||||
- Fixes in defragmentation algorithm.
|
|
||||||
- Fixes in GpuMemDumpVis.py regarding image height calculation.
|
|
||||||
- Other bug fixes, optimizations, and improvements in the code and documentation.
|
|
||||||
|
|
||||||
# 3.0.0 (2022-03-25)
|
|
||||||
|
|
||||||
It has been a long time since the previous official release, so hopefully everyone has been using the latest code from "master" branch, which is always maintained in a good state, not the old version. For completeness, here is the list of changes since v2.3.0. The major version number has changed, so there are some compatibility-breaking changes, but the basic API stays the same and is mostly backward-compatible.
|
|
||||||
|
|
||||||
Major features added (some compatibility-breaking):
|
|
||||||
|
|
||||||
- Added new API for selecting preferred memory type: flags `VMA_MEMORY_USAGE_AUTO`, `VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE`, `VMA_MEMORY_USAGE_AUTO_PREFER_HOST`, `VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT`, `VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT`, `VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT`. Old values like `VMA_MEMORY_USAGE_GPU_ONLY` still work as before, for backward compatibility, but are not recommended.
|
|
||||||
- Added new defragmentation API and algorithm, replacing the old one. See structure `VmaDefragmentationInfo`, `VmaDefragmentationMove`, `VmaDefragmentationPassMoveInfo`, `VmaDefragmentationStats`, function `vmaBeginDefragmentation`, `vmaEndDefragmentation`, `vmaBeginDefragmentationPass`, `vmaEndDefragmentationPass`.
|
|
||||||
- Redesigned API for statistics, replacing the old one. See structures: `VmaStatistics`, `VmaDetailedStatistics`, `VmaTotalStatistics`. `VmaBudget`, functions: `vmaGetHeapBudgets`, `vmaCalculateStatistics`, `vmaGetPoolStatistics`, `vmaCalculatePoolStatistics`, `vmaGetVirtualBlockStatistics`, `vmaCalculateVirtualBlockStatistics`.
|
|
||||||
- Added "Virtual allocator" feature - possibility to use core allocation algorithms for allocation of custom memory, not necessarily Vulkan device memory. See functions like `vmaCreateVirtualBlock`, `vmaDestroyVirtualBlock` and many more.
|
|
||||||
- `VmaAllocation` now keeps both `void* pUserData` and `char* pName`. Added function `vmaSetAllocationName`, member `VmaAllocationInfo::pName`. Flag `VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT` is now deprecated.
|
|
||||||
- Clarified and cleaned up various ways of importing Vulkan functions. See macros `VMA_STATIC_VULKAN_FUNCTIONS`, `VMA_DYNAMIC_VULKAN_FUNCTIONS`, structure `VmaVulkanFunctions`. Added members `VmaVulkanFunctions::vkGetInstanceProcAddr`, `vkGetDeviceProcAddr`, which are now required when using `VMA_DYNAMIC_VULKAN_FUNCTIONS`.
|
|
||||||
|
|
||||||
Removed (compatibility-breaking):
|
|
||||||
|
|
||||||
- Removed whole "lost allocations" feature. Removed from the interface: `VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT`, `VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT`, `vmaCreateLostAllocation`, `vmaMakePoolAllocationsLost`, `vmaTouchAllocation`, `VmaAllocatorCreateInfo::frameInUseCount`, `VmaPoolCreateInfo::frameInUseCount`.
|
|
||||||
- Removed whole "record & replay" feature. Removed from the API: `VmaAllocatorCreateInfo::pRecordSettings`, `VmaRecordSettings`, `VmaRecordFlagBits`, `VmaRecordFlags`. Removed VmaReplay application.
|
|
||||||
- Removed "buddy" algorithm - removed flag `VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT`.
|
|
||||||
|
|
||||||
Minor but compatibility-breaking changes:
|
|
||||||
|
|
||||||
- Changes in `ALLOCATION_CREATE_STRATEGY` flags. Removed flags: `VMA_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT`, `VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT`, `VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT`, which were aliases to other existing flags.
|
|
||||||
- Added a member `void* pUserData` to `VmaDeviceMemoryCallbacks`. Updated `PFN_vmaAllocateDeviceMemoryFunction`, `PFN_vmaFreeDeviceMemoryFunction` to use the new `pUserData` member.
|
|
||||||
- Removed function `vmaResizeAllocation` that was already deprecated.
|
|
||||||
|
|
||||||
Other major changes:
|
|
||||||
|
|
||||||
- Added new features to custom pools: support for dedicated allocations, new member `VmaPoolCreateInfo::pMemoryAllocateNext`, `minAllocationAlignment`.
|
|
||||||
- Added support for Vulkan 1.2, 1.3.
|
|
||||||
- Added support for VK_KHR_buffer_device_address extension - flag `VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT`.
|
|
||||||
- Added support for VK_EXT_memory_priority extension - flag `VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT`, members `VmaAllocationCreateInfo::priority`, `VmaPoolCreateInfo::priority`.
|
|
||||||
- Added support for VK_AMD_device_coherent_memory extension - flag `VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT`.
|
|
||||||
- Added member `VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes`.
|
|
||||||
- Added function `vmaGetAllocatorInfo`, structure `VmaAllocatorInfo`.
|
|
||||||
- Added functions `vmaFlushAllocations`, `vmaInvalidateAllocations` for multiple allocations at once.
|
|
||||||
- Added flag `VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT`.
|
|
||||||
- Added function `vmaCreateBufferWithAlignment`.
|
|
||||||
- Added convenience function `vmaGetAllocationMemoryProperties`.
|
|
||||||
- Added convenience functions: `vmaCreateAliasingBuffer`, `vmaCreateAliasingImage`.
|
|
||||||
|
|
||||||
Other minor changes:
|
|
||||||
|
|
||||||
- Implemented Two-Level Segregated Fit (TLSF) allocation algorithm, replacing previous default one. It is much faster, especially when freeing many allocations at once or when `bufferImageGranularity` is large.
|
|
||||||
- Renamed debug macro `VMA_DEBUG_ALIGNMENT` to `VMA_MIN_ALIGNMENT`.
|
|
||||||
- Added CMake support - CMakeLists.txt files. Removed Premake support.
|
|
||||||
- Changed `vmaInvalidateAllocation` and `vmaFlushAllocation` to return `VkResult`.
|
|
||||||
- Added nullability annotations for Clang: `VMA_NULLABLE`, `VMA_NOT_NULL`, `VMA_NULLABLE_NON_DISPATCHABLE`, `VMA_NOT_NULL_NON_DISPATCHABLE`, `VMA_LEN_IF_NOT_NULL`.
|
|
||||||
- JSON dump format has changed.
|
|
||||||
- Countless fixes and improvements, including performance optimizations, compatibility with various platforms and compilers, documentation.
|
|
||||||
|
|
||||||
# 2.3.0 (2019-12-04)
|
|
||||||
|
|
||||||
Major release after a year of development in "master" branch and feature branches. Notable new features: supporting Vulkan 1.1, supporting query for memory budget.
|
|
||||||
|
|
||||||
Major changes:
|
|
||||||
|
|
||||||
- Added support for Vulkan 1.1.
|
|
||||||
- Added member `VmaAllocatorCreateInfo::vulkanApiVersion`.
|
|
||||||
- When Vulkan 1.1 is used, there is no need to enable VK_KHR_dedicated_allocation or VK_KHR_bind_memory2 extensions, as they are promoted to Vulkan itself.
|
|
||||||
- Added support for query for memory budget and staying within the budget.
|
|
||||||
- Added function `vmaGetBudget`, structure `VmaBudget`. This can also serve as simple statistics, more efficient than `vmaCalculateStats`.
|
|
||||||
- By default the budget it is estimated based on memory heap sizes. It may be queried from the system using VK_EXT_memory_budget extension if you use `VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT` flag and `VmaAllocatorCreateInfo::instance` member.
|
|
||||||
- Added flag `VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT` that fails an allocation if it would exceed the budget.
|
|
||||||
- Added new memory usage options:
|
|
||||||
- `VMA_MEMORY_USAGE_CPU_COPY` for memory that is preferably not `DEVICE_LOCAL` but not guaranteed to be `HOST_VISIBLE`.
|
|
||||||
- `VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED` for memory that is `LAZILY_ALLOCATED`.
|
|
||||||
- Added support for VK_KHR_bind_memory2 extension:
|
|
||||||
- Added `VMA_ALLOCATION_CREATE_DONT_BIND_BIT` flag that lets you create both buffer/image and allocation, but don't bind them together.
|
|
||||||
- Added flag `VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT`, functions `vmaBindBufferMemory2`, `vmaBindImageMemory2` that let you specify additional local offset and `pNext` pointer while binding.
|
|
||||||
- Added functions `vmaSetPoolName`, `vmaGetPoolName` that let you assign string names to custom pools. JSON dump file format and VmaDumpVis tool is updated to show these names.
|
|
||||||
- Defragmentation is legal only on buffers and images in `VK_IMAGE_TILING_LINEAR`. This is due to the way it is currently implemented in the library and the restrictions of the Vulkan specification. Clarified documentation in this regard. See discussion in #59.
|
|
||||||
|
|
||||||
Minor changes:
|
|
||||||
|
|
||||||
- Made `vmaResizeAllocation` function deprecated, always returning failure.
|
|
||||||
- Made changes in the internal algorithm for the choice of memory type. Be careful! You may now get a type that is not `HOST_VISIBLE` or `HOST_COHERENT` if it's not stated as always ensured by some `VMA_MEMORY_USAGE_*` flag.
|
|
||||||
- Extended VmaReplay application with more detailed statistics printed at the end.
|
|
||||||
- Added macros `VMA_CALL_PRE`, `VMA_CALL_POST` that let you decorate declarations of all library functions if you want to e.g. export/import them as dynamically linked library.
|
|
||||||
- Optimized `VmaAllocation` objects to be allocated out of an internal free-list allocator. This makes allocation and deallocation causing 0 dynamic CPU heap allocations on average.
|
|
||||||
- Updated recording CSV file format version to 1.8, to support new functions.
|
|
||||||
- Many additions and fixes in documentation. Many compatibility fixes for various compilers and platforms. Other internal bugfixes, optimizations, updates, refactoring...
|
|
||||||
|
|
||||||
# 2.2.0 (2018-12-13)
|
|
||||||
|
|
||||||
Major release after many months of development in "master" branch and feature branches. Notable new features: defragmentation of GPU memory, buddy algorithm, convenience functions for sparse binding.
|
|
||||||
|
|
||||||
Major changes:
|
|
||||||
|
|
||||||
- New, more powerful defragmentation:
|
|
||||||
- Added structure `VmaDefragmentationInfo2`, functions `vmaDefragmentationBegin`, `vmaDefragmentationEnd`.
|
|
||||||
- Added support for defragmentation of GPU memory.
|
|
||||||
- Defragmentation of CPU memory now uses `memmove`, so it can move data to overlapping regions.
|
|
||||||
- Defragmentation of CPU memory is now available for memory types that are `HOST_VISIBLE` but not `HOST_COHERENT`.
|
|
||||||
- Added structure member `VmaVulkanFunctions::vkCmdCopyBuffer`.
|
|
||||||
- Major internal changes in defragmentation algorithm.
|
|
||||||
- VmaReplay: added parameters: `--DefragmentAfterLine`, `--DefragmentationFlags`.
|
|
||||||
- Old interface (structure `VmaDefragmentationInfo`, function `vmaDefragment`) is now deprecated.
|
|
||||||
- Added buddy algorithm, available for custom pools - flag `VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT`.
|
|
||||||
- Added convenience functions for multiple allocations and deallocations at once, intended for sparse binding resources - functions `vmaAllocateMemoryPages`, `vmaFreeMemoryPages`.
|
|
||||||
- Added function that tries to resize existing allocation in place: `vmaResizeAllocation`.
|
|
||||||
- Added flags for allocation strategy: `VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT`, `VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT`, `VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT`, and their aliases: `VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT`, `VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT`, `VMA_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT`.
|
|
||||||
|
|
||||||
Minor changes:
|
|
||||||
|
|
||||||
- Changed behavior of allocation functions to return `VK_ERROR_VALIDATION_FAILED_EXT` when trying to allocate memory of size 0, create buffer with size 0, or image with one of the dimensions 0.
|
|
||||||
- VmaReplay: Added support for Windows end of lines.
|
|
||||||
- Updated recording CSV file format version to 1.5, to support new functions.
|
|
||||||
- Internal optimization: using read-write mutex on some platforms.
|
|
||||||
- Many additions and fixes in documentation. Many compatibility fixes for various compilers. Other internal bugfixes, optimizations, refactoring, added more internal validation...
|
|
||||||
|
|
||||||
# 2.1.0 (2018-09-10)
|
|
||||||
|
|
||||||
Minor bugfixes.
|
|
||||||
|
|
||||||
# 2.1.0-beta.1 (2018-08-27)
|
|
||||||
|
|
||||||
Major release after many months of development in "development" branch and features branches. Many new features added, some bugs fixed. API stays backward-compatible.
|
|
||||||
|
|
||||||
Major changes:
|
|
||||||
|
|
||||||
- Added linear allocation algorithm, accessible for custom pools, that can be used as free-at-once, stack, double stack, or ring buffer. See "Linear allocation algorithm" documentation chapter.
|
|
||||||
- Added `VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT`, `VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT`.
|
|
||||||
- Added feature to record sequence of calls to the library to a file and replay it using dedicated application. See documentation chapter "Record and replay".
|
|
||||||
- Recording: added `VmaAllocatorCreateInfo::pRecordSettings`.
|
|
||||||
- Replaying: added VmaReplay project.
|
|
||||||
- Recording file format: added document "docs/Recording file format.md".
|
|
||||||
- Improved support for non-coherent memory.
|
|
||||||
- Added functions: `vmaFlushAllocation`, `vmaInvalidateAllocation`.
|
|
||||||
- `nonCoherentAtomSize` is now respected automatically.
|
|
||||||
- Added `VmaVulkanFunctions::vkFlushMappedMemoryRanges`, `vkInvalidateMappedMemoryRanges`.
|
|
||||||
- Improved debug features related to detecting incorrect mapped memory usage. See documentation chapter "Debugging incorrect memory usage".
|
|
||||||
- Added debug macro `VMA_DEBUG_DETECT_CORRUPTION`, functions `vmaCheckCorruption`, `vmaCheckPoolCorruption`.
|
|
||||||
- Added debug macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to initialize contents of allocations with a bit pattern.
|
|
||||||
- Changed behavior of `VMA_DEBUG_MARGIN` macro - it now adds margin also before first and after last allocation in a block.
|
|
||||||
- Changed format of JSON dump returned by `vmaBuildStatsString` (not backward compatible!).
|
|
||||||
- Custom pools and memory blocks now have IDs that don't change after sorting.
|
|
||||||
- Added properties: "CreationFrameIndex", "LastUseFrameIndex", "Usage".
|
|
||||||
- Changed VmaDumpVis tool to use these new properties for better coloring.
|
|
||||||
- Changed behavior of `vmaGetAllocationInfo` and `vmaTouchAllocation` to update `allocation.lastUseFrameIndex` even if allocation cannot become lost.
|
|
||||||
|
|
||||||
Minor changes:
|
|
||||||
|
|
||||||
- Changes in custom pools:
|
|
||||||
- Added new structure member `VmaPoolStats::blockCount`.
|
|
||||||
- Changed behavior of `VmaPoolCreateInfo::blockSize` = 0 (default) - it now means that pool may use variable block sizes, just like default pools do.
|
|
||||||
- Improved logic of `vmaFindMemoryTypeIndex` for some cases, especially integrated GPUs.
|
|
||||||
- VulkanSample application: Removed dependency on external library MathFu. Added own vector and matrix structures.
|
|
||||||
- Changes that improve compatibility with various platforms, including: Visual Studio 2012, 32-bit code, C compilers.
|
|
||||||
- Changed usage of "VK_KHR_dedicated_allocation" extension in the code to be optional, driven by macro `VMA_DEDICATED_ALLOCATION`, for compatibility with Android.
|
|
||||||
- Many additions and fixes in documentation, including description of new features, as well as "Validation layer warnings".
|
|
||||||
- Other bugfixes.
|
|
||||||
|
|
||||||
# 2.0.0 (2018-03-19)
|
|
||||||
|
|
||||||
A major release with many compatibility-breaking changes.
|
|
||||||
|
|
||||||
Notable new features:
|
|
||||||
|
|
||||||
- Introduction of `VmaAllocation` handle that you must retrieve from allocation functions and pass to deallocation functions next to normal `VkBuffer` and `VkImage`.
|
|
||||||
- Introduction of `VmaAllocationInfo` structure that you can retrieve from `VmaAllocation` handle to access parameters of the allocation (like `VkDeviceMemory` and offset) instead of retrieving them directly from allocation functions.
|
|
||||||
- Support for reference-counted mapping and persistently mapped allocations - see `vmaMapMemory`, `VMA_ALLOCATION_CREATE_MAPPED_BIT`.
|
|
||||||
- Support for custom memory pools - see `VmaPool` handle, `VmaPoolCreateInfo` structure, `vmaCreatePool` function.
|
|
||||||
- Support for defragmentation (compaction) of allocations - see function `vmaDefragment` and related structures.
|
|
||||||
- Support for "lost allocations" - see appropriate chapter on documentation Main Page.
|
|
||||||
|
|
||||||
# 1.0.1 (2017-07-04)
|
|
||||||
|
|
||||||
- Fixes for Linux GCC compilation.
|
|
||||||
- Changed "CONFIGURATION SECTION" to contain #ifndef so you can define these macros before including this header, not necessarily change them in the file.
|
|
||||||
|
|
||||||
# 1.0.0 (2017-06-16)
|
|
||||||
|
|
||||||
First public release.
|
|
@ -1,19 +0,0 @@
|
|||||||
Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in
|
|
||||||
all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
THE SOFTWARE.
|
|
@ -1,175 +0,0 @@
|
|||||||
# Vulkan Memory Allocator
|
|
||||||
|
|
||||||
Easy to integrate Vulkan memory allocation library.
|
|
||||||
|
|
||||||
**Documentation:** Browse online: [Vulkan Memory Allocator](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/) (generated from Doxygen-style comments in [include/vk_mem_alloc.h](include/vk_mem_alloc.h))
|
|
||||||
|
|
||||||
**License:** MIT. See [LICENSE.txt](LICENSE.txt)
|
|
||||||
|
|
||||||
**Changelog:** See [CHANGELOG.md](CHANGELOG.md)
|
|
||||||
|
|
||||||
**Product page:** [Vulkan Memory Allocator on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/)
|
|
||||||
|
|
||||||
**Build status:**
|
|
||||||
|
|
||||||
- Windows: [](https://ci.appveyor.com/project/adam-sawicki-amd/vulkanmemoryallocator/branch/master)
|
|
||||||
- Linux: [](https://app.travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
|
|
||||||
|
|
||||||
[](http://isitmaintained.com/project/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator "Average time to resolve an issue")
|
|
||||||
|
|
||||||
# Problem
|
|
||||||
|
|
||||||
Memory allocation and resource (buffer and image) creation in Vulkan is difficult (comparing to older graphics APIs, like D3D11 or OpenGL) for several reasons:
|
|
||||||
|
|
||||||
- It requires a lot of boilerplate code, just like everything else in Vulkan, because it is a low-level and high-performance API.
|
|
||||||
- There is additional level of indirection: `VkDeviceMemory` is allocated separately from creating `VkBuffer`/`VkImage` and they must be bound together.
|
|
||||||
- Driver must be queried for supported memory heaps and memory types. Different GPU vendors provide different types of it.
|
|
||||||
- It is recommended to allocate bigger chunks of memory and assign parts of them to particular resources, as there is a limit on maximum number of memory blocks that can be allocated.
|
|
||||||
|
|
||||||
# Features
|
|
||||||
|
|
||||||
This library can help game developers to manage memory allocations and resource creation by offering some higher-level functions:
|
|
||||||
|
|
||||||
1. Functions that help to choose correct and optimal memory type based on intended usage of the memory.
|
|
||||||
- Required or preferred traits of the memory are expressed using higher-level description comparing to Vulkan flags.
|
|
||||||
2. Functions that allocate memory blocks, reserve and return parts of them (`VkDeviceMemory` + offset + size) to the user.
|
|
||||||
- Library keeps track of allocated memory blocks, used and unused ranges inside them, finds best matching unused ranges for new allocations, respects all the rules of alignment and buffer/image granularity.
|
|
||||||
3. Functions that can create an image/buffer, allocate memory for it and bind them together - all in one call.
|
|
||||||
|
|
||||||
Additional features:
|
|
||||||
|
|
||||||
- Well-documented - description of all functions and structures provided, along with chapters that contain general description and example code.
|
|
||||||
- Thread-safety: Library is designed to be used in multithreaded code. Access to a single device memory block referred by different buffers and textures (binding, mapping) is synchronized internally. Memory mapping is reference-counted.
|
|
||||||
- Configuration: Fill optional members of `VmaAllocatorCreateInfo` structure to provide custom CPU memory allocator, pointers to Vulkan functions and other parameters.
|
|
||||||
- Customization and integration with custom engines: Predefine appropriate macros to provide your own implementation of all external facilities used by the library like assert, mutex, atomic.
|
|
||||||
- Support for memory mapping, reference-counted internally. Support for persistently mapped memory: Just allocate with appropriate flag and access the pointer to already mapped memory.
|
|
||||||
- Support for non-coherent memory. Functions that flush/invalidate memory. `nonCoherentAtomSize` is respected automatically.
|
|
||||||
- Support for resource aliasing (overlap).
|
|
||||||
- Support for sparse binding and sparse residency: Convenience functions that allocate or free multiple memory pages at once.
|
|
||||||
- Custom memory pools: Create a pool with desired parameters (e.g. fixed or limited maximum size) and allocate memory out of it.
|
|
||||||
- Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion.
|
|
||||||
- Support for Vulkan 1.0, 1.1, 1.2, 1.3.
|
|
||||||
- Support for extensions (and equivalent functionality included in new Vulkan versions):
|
|
||||||
- VK_KHR_dedicated_allocation: Just enable it and it will be used automatically by the library.
|
|
||||||
- VK_KHR_buffer_device_address: Flag `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR` is automatically added to memory allocations where needed.
|
|
||||||
- VK_EXT_memory_budget: Used internally if available to query for current usage and budget. If not available, it falls back to an estimation based on memory heap sizes.
|
|
||||||
- VK_EXT_memory_priority: Set `priority` of allocations or custom pools and it will be set automatically using this extension.
|
|
||||||
- VK_AMD_device_coherent_memory
|
|
||||||
- Defragmentation of GPU and CPU memory: Let the library move data around to free some memory blocks and make your allocations better compacted.
|
|
||||||
- Statistics: Obtain brief or detailed statistics about the amount of memory used, unused, number of allocated blocks, number of allocations etc. - globally, per memory heap, and per memory type.
|
|
||||||
- Debug annotations: Associate custom `void* pUserData` and debug `char* pName` with each allocation.
|
|
||||||
- JSON dump: Obtain a string in JSON format with detailed map of internal state, including list of allocations, their string names, and gaps between them.
|
|
||||||
- Convert this JSON dump into a picture to visualize your memory. See [tools/GpuMemDumpVis](tools/GpuMemDumpVis/README.md).
|
|
||||||
- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number after every allocation to detect out-of-bounds memory corruption.
|
|
||||||
- Support for interoperability with OpenGL.
|
|
||||||
- Virtual allocator: Interface for using core allocation algorithm to allocate any custom data, e.g. pieces of one large buffer.
|
|
||||||
|
|
||||||
# Prerequisites
|
|
||||||
|
|
||||||
- Self-contained C++ library in single header file. No external dependencies other than standard C and C++ library and of course Vulkan. Some features of C++14 used. STL containers, RTTI, or C++ exceptions are not used.
|
|
||||||
- Public interface in C, in same convention as Vulkan API. Implementation in C++.
|
|
||||||
- Error handling implemented by returning `VkResult` error codes - same way as in Vulkan.
|
|
||||||
- Interface documented using Doxygen-style comments.
|
|
||||||
- Platform-independent, but developed and tested on Windows using Visual Studio. Continuous integration setup for Windows and Linux. Used also on Android, MacOS, and other platforms.
|
|
||||||
|
|
||||||
# Example
|
|
||||||
|
|
||||||
Basic usage of this library is very simple. Advanced features are optional. After you created global `VmaAllocator` object, a complete code needed to create a buffer may look like this:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
|
||||||
bufferInfo.size = 65536;
|
|
||||||
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
|
||||||
|
|
||||||
VmaAllocationCreateInfo allocInfo = {};
|
|
||||||
allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
|
|
||||||
|
|
||||||
VkBuffer buffer;
|
|
||||||
VmaAllocation allocation;
|
|
||||||
vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
|
|
||||||
```
|
|
||||||
|
|
||||||
With this one function call:
|
|
||||||
|
|
||||||
1. `VkBuffer` is created.
|
|
||||||
2. `VkDeviceMemory` block is allocated if needed.
|
|
||||||
3. An unused region of the memory block is bound to this buffer.
|
|
||||||
|
|
||||||
`VmaAllocation` is an object that represents memory assigned to this buffer. It can be queried for parameters like `VkDeviceMemory` handle and offset.
|
|
||||||
|
|
||||||
# How to build
|
|
||||||
|
|
||||||
On Windows it is recommended to use [CMake UI](https://cmake.org/runningcmake/). Alternatively you can generate a Visual Studio project map using CMake in command line: `cmake -B./build/ -DCMAKE_BUILD_TYPE=Debug -G "Visual Studio 16 2019" -A x64 ./`
|
|
||||||
|
|
||||||
On Linux:
|
|
||||||
|
|
||||||
```
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
make
|
|
||||||
```
|
|
||||||
|
|
||||||
The following targets are available
|
|
||||||
|
|
||||||
| Target | Description | CMake option | Default setting |
|
|
||||||
| ------------- | ------------- | ------------- | ------------- |
|
|
||||||
| VmaSample | VMA sample application | `VMA_BUILD_SAMPLE` | `OFF` |
|
|
||||||
| VmaBuildSampleShaders | Shaders for VmaSample | `VMA_BUILD_SAMPLE_SHADERS` | `OFF` |
|
|
||||||
|
|
||||||
Please note that while VulkanMemoryAllocator library is supported on other platforms besides Windows, VmaSample is not.
|
|
||||||
|
|
||||||
These CMake options are available
|
|
||||||
|
|
||||||
| CMake option | Description | Default setting |
|
|
||||||
| ------------- | ------------- | ------------- |
|
|
||||||
| `VMA_RECORDING_ENABLED` | Enable VMA memory recording for debugging | `OFF` |
|
|
||||||
| `VMA_USE_STL_CONTAINERS` | Use C++ STL containers instead of VMA's containers | `OFF` |
|
|
||||||
| `VMA_STATIC_VULKAN_FUNCTIONS` | Link statically with Vulkan API | `OFF` |
|
|
||||||
| `VMA_DYNAMIC_VULKAN_FUNCTIONS` | Fetch pointers to Vulkan functions internally (no static linking) | `ON` |
|
|
||||||
| `VMA_DEBUG_ALWAYS_DEDICATED_MEMORY` | Every allocation will have its own memory block | `OFF` |
|
|
||||||
| `VMA_DEBUG_INITIALIZE_ALLOCATIONS` | Automatically fill new allocations and destroyed allocations with some bit pattern | `OFF` |
|
|
||||||
| `VMA_DEBUG_GLOBAL_MUTEX` | Enable single mutex protecting all entry calls to the library | `OFF` |
|
|
||||||
| `VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT` | Never exceed [VkPhysicalDeviceLimits::maxMemoryAllocationCount](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#limits-maxMemoryAllocationCount) and return error | `OFF` |
|
|
||||||
|
|
||||||
# Binaries
|
|
||||||
|
|
||||||
The release comes with precompiled binary executable for "VulkanSample" application which contains test suite. It is compiled using Visual Studio 2019, so it requires appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If the launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads), "x64" version.
|
|
||||||
|
|
||||||
# Read more
|
|
||||||
|
|
||||||
See **[Documentation](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/)**.
|
|
||||||
|
|
||||||
# Software using this library
|
|
||||||
|
|
||||||
- **[X-Plane](https://x-plane.com/)**
|
|
||||||
- **[Detroit: Become Human](https://gpuopen.com/learn/porting-detroit-3/)**
|
|
||||||
- **[Vulkan Samples](https://github.com/LunarG/VulkanSamples)** - official Khronos Vulkan samples. License: Apache-style.
|
|
||||||
- **[Anvil](https://github.com/GPUOpen-LibrariesAndSDKs/Anvil)** - cross-platform framework for Vulkan. License: MIT.
|
|
||||||
- **[Filament](https://github.com/google/filament)** - physically based rendering engine for Android, Windows, Linux and macOS, from Google. Apache License 2.0.
|
|
||||||
- **[Atypical Games - proprietary game engine](https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html)**
|
|
||||||
- **[Flax Engine](https://flaxengine.com/)**
|
|
||||||
- **[Godot Engine](https://github.com/godotengine/godot/)** - multi-platform 2D and 3D game engine. License: MIT.
|
|
||||||
- **[Lightweight Java Game Library (LWJGL)](https://www.lwjgl.org/)** - includes binding of the library for Java. License: BSD.
|
|
||||||
- **[PowerVR SDK](https://github.com/powervr-graphics/Native_SDK)** - C++ cross-platform 3D graphics SDK, from Imagination. License: MIT.
|
|
||||||
- **[Skia](https://github.com/google/skia)** - complete 2D graphic library for drawing Text, Geometries, and Images, from Google.
|
|
||||||
- **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0.
|
|
||||||
- **[VK9](https://github.com/disks86/VK9)** - Direct3D 9 compatibility layer using Vulkan. Zlib lincese.
|
|
||||||
- **[vkDOOM3](https://github.com/DustinHLand/vkDOOM3)** - Vulkan port of GPL DOOM 3 BFG Edition. License: GNU GPL.
|
|
||||||
- **[vkQuake2](https://github.com/kondrak/vkQuake2)** - vanilla Quake 2 with Vulkan support. License: GNU GPL.
|
|
||||||
- **[Vulkan Best Practice for Mobile Developers](https://github.com/ARM-software/vulkan_best_practice_for_mobile_developers)** from ARM. License: MIT.
|
|
||||||
- **[RPCS3](https://github.com/RPCS3/rpcs3)** - PlayStation 3 emulator/debugger. License: GNU GPLv2.
|
|
||||||
- **[PPSSPP](https://github.com/hrydgard/ppsspp)** - Playstation Portable emulator/debugger. License: GNU GPLv2+.
|
|
||||||
|
|
||||||
[Many other projects on GitHub](https://github.com/search?q=AMD_VULKAN_MEMORY_ALLOCATOR_H&type=Code) and some game development studios that use Vulkan in their games.
|
|
||||||
|
|
||||||
# See also
|
|
||||||
|
|
||||||
- **[D3D12 Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator)** - equivalent library for Direct3D 12. License: MIT.
|
|
||||||
- **[Awesome Vulkan](https://github.com/vinjn/awesome-vulkan)** - a curated list of awesome Vulkan libraries, debuggers and resources.
|
|
||||||
- **[VulkanMemoryAllocator-Hpp](https://github.com/malte-v/VulkanMemoryAllocator-Hpp)** - C++ binding for this library. License: CC0-1.0.
|
|
||||||
- **[PyVMA](https://github.com/realitix/pyvma)** - Python wrapper for this library. Author: Jean-Sébastien B. (@realitix). License: Apache 2.0.
|
|
||||||
- **[vk-mem](https://github.com/gwihlidal/vk-mem-rs)** - Rust binding for this library. Author: Graham Wihlidal. License: Apache 2.0 or MIT.
|
|
||||||
- **[Haskell bindings](https://hackage.haskell.org/package/VulkanMemoryAllocator)**, **[github](https://github.com/expipiplus1/vulkan/tree/master/VulkanMemoryAllocator)** - Haskell bindings for this library. Author: Ellie Hermaszewska (@expipiplus1). License BSD-3-Clause.
|
|
||||||
- **[vma_sample_sdl](https://github.com/rextimmy/vma_sample_sdl)** - SDL port of the sample app of this library (with the goal of running it on multiple platforms, including MacOS). Author: @rextimmy. License: MIT.
|
|
||||||
- **[vulkan-malloc](https://github.com/dylanede/vulkan-malloc)** - Vulkan memory allocation library for Rust. Based on version 1 of this library. Author: Dylan Ede (@dylanede). License: MIT / Apache 2.0.
|
|
File diff suppressed because it is too large
Load Diff
33
docs/CODE_STYLE.md
Normal file
33
docs/CODE_STYLE.md
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# Naming
|
||||||
|
|
||||||
|
## Functions
|
||||||
|
|
||||||
|
Functions (and methods) use `PascalCase`.
|
||||||
|
Functions (but not methods) exposed in headers additionally use the prefix `rt`: `rtLoadResource`.
|
||||||
|
|
||||||
|
## Types
|
||||||
|
|
||||||
|
Types (including C++ clases) use `snake_case`.
|
||||||
|
Types exposed in header files use the prefix `rt_`: `rt_aio_handle`.
|
||||||
|
This is often also done for "private" types in C/C++ files, but there it is not mandatory.
|
||||||
|
|
||||||
|
## Variables
|
||||||
|
|
||||||
|
Variables use `snake_case`.
|
||||||
|
Global variables use the prefix `g_`, while file-scope global variables use a single underscore as their prefix `_`.
|
||||||
|
Member variables of C++ classes (but not plain-old data C structs) use the prefix `m_`.
|
||||||
|
|
||||||
|
## Macros
|
||||||
|
|
||||||
|
Macros use `ALL_UPPER_CASE` with the `RT_` prefix.
|
||||||
|
This is also used for enum options.
|
||||||
|
|
||||||
|
## API and Interface Structs
|
||||||
|
|
||||||
|
API structs (like `rt_render_backend_api`) are collections of function pointers,
|
||||||
|
without any "object pointer" containing wrapped state.
|
||||||
|
They use the suffix `_api`.
|
||||||
|
|
||||||
|
Interface structs (like `rt_render_device_i`) also contain function pointers,
|
||||||
|
and additionally an "object pointer", usually called `o`, that points to some
|
||||||
|
stateful object. The function pointers in the interface usually take this object as their first parameter.
|
@ -17,6 +17,7 @@ The type is one of:
|
|||||||
- **refactor**: Commit refactors code
|
- **refactor**: Commit refactors code
|
||||||
- **perf**: Commit optimizes performance
|
- **perf**: Commit optimizes performance
|
||||||
- **doc**: Commit changes or adds documentation
|
- **doc**: Commit changes or adds documentation
|
||||||
|
- **tweak**: Minor changes
|
||||||
|
|
||||||
A scope may be appended to the type, for example:
|
A scope may be appended to the type, for example:
|
||||||
`feat(renderer): Implement the good lights`
|
`feat(renderer): Implement the good lights`
|
||||||
|
12
docs/REQUIRED_TOOLS.md
Normal file
12
docs/REQUIRED_TOOLS.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
# Tools required to build this
|
||||||
|
|
||||||
|
- [meson](https://mesonbuild.com/Getting-meson.html) The build system we use.
|
||||||
|
- [cmake](https://cmake.org/download/) For building some of the dependencies.
|
||||||
|
- [git](https://git-scm.com/) of course
|
||||||
|
|
||||||
|
On Windows:
|
||||||
|
- Visual Studio (2022 is what is frequently tested). Install the Game Development with C++ package for DirectX.
|
||||||
|
|
||||||
|
On Linux:
|
||||||
|
- GCC or clang.
|
||||||
|
- wget (for scripts)
|
37
meson.build
37
meson.build
@ -58,26 +58,33 @@ if build_machine.system() == 'linux' and host_machine.system() == 'windows'
|
|||||||
message('Adding /usr/share/mingw-w64/include to the project include path.')
|
message('Adding /usr/share/mingw-w64/include to the project include path.')
|
||||||
add_project_arguments(['-isystem/usr/share/mingw-w64/include',
|
add_project_arguments(['-isystem/usr/share/mingw-w64/include',
|
||||||
'-DRT_CROSS_LINUX_WINDOWS',
|
'-DRT_CROSS_LINUX_WINDOWS',
|
||||||
'-D_WIN32_WINNT=0x600'],
|
'-D_WIN32_WINNT=0x600',
|
||||||
|
'-lgcc',
|
||||||
|
'-municode'],
|
||||||
language: ['c', 'cpp'],
|
language: ['c', 'cpp'],
|
||||||
native: false)
|
native: false)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
fs = import('fs')
|
fs = import('fs')
|
||||||
|
cmake = import('cmake')
|
||||||
|
|
||||||
# Gather dependencies
|
# Gather dependencies
|
||||||
thread_dep = dependency('threads')
|
thread_dep = dependency('threads')
|
||||||
m_dep = compiler.find_library('m', required : false)
|
m_dep = compiler.find_library('m', required : false)
|
||||||
vk_dep = dependency('vulkan', required : false)
|
|
||||||
|
|
||||||
# Subprojects installed via wraps
|
# Subprojects installed via wraps
|
||||||
meshoptimizer_proj = subproject('meshoptimizer', default_options: ['warning_level=0', 'werror=false'] )
|
meshoptimizer_opts = cmake.subproject_options()
|
||||||
|
meshoptimizer_opts.add_cmake_defines({'CMAKE_POSITION_INDEPENDENT_CODE': true})
|
||||||
|
meshoptimizer_proj = cmake.subproject('meshoptimizer', options: meshoptimizer_opts)
|
||||||
meshoptimizer_dep = meshoptimizer_proj.get_variable('meshoptimizer_dep')
|
meshoptimizer_dep = meshoptimizer_proj.get_variable('meshoptimizer_dep')
|
||||||
|
|
||||||
windowing_dep = []
|
glfw_proj = subproject('glfw', default_options: ['default_library=shared', 'warning_level=0', 'werror=false'])
|
||||||
if get_option('use_xlib')
|
glfw_dep = glfw_proj.get_variable('glfw_dep')
|
||||||
windowing_dep = dependency('x11', required : true)
|
|
||||||
|
if host_machine.system() == 'linux' and get_option('use_xlib')
|
||||||
add_project_arguments(['-DRT_USE_XLIB'], language : ['c', 'cpp'])
|
add_project_arguments(['-DRT_USE_XLIB'], language : ['c', 'cpp'])
|
||||||
|
elif host_machine.system() == 'linux' and get_option('use_wayland')
|
||||||
|
add_project_arguments(['-DRT_USE_WAYLAND'], language: ['c', 'cpp'])
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Copy file utility
|
# Copy file utility
|
||||||
@ -98,16 +105,16 @@ subdir('src')
|
|||||||
engine_link_libs = []
|
engine_link_libs = []
|
||||||
if get_option('default_library') == 'static'
|
if get_option('default_library') == 'static'
|
||||||
if get_option('static_renderer') == 'vk'
|
if get_option('static_renderer') == 'vk'
|
||||||
engine_link_libs = [runtime_lib, app_lib, vk_renderer_lib]
|
engine_link_libs = [runtime_lib, vk_renderer_lib]
|
||||||
elif get_option('static_renderer') == 'null'
|
elif get_option('static_renderer') == 'null'
|
||||||
engine_link_libs = [runtime_lib, app_lib, null_renderer_lib]
|
engine_link_libs = [runtime_lib, null_renderer_lib]
|
||||||
elif get_option('static_renderer') == 'dx11'
|
elif get_option('static_renderer') == 'dx11'
|
||||||
engine_link_libs = [runtime_lib, app_lib, dx11_renderer_lib]
|
engine_link_libs = [runtime_lib, dx11_renderer_lib]
|
||||||
else
|
else
|
||||||
error('Invalid static_renderer option ', get_option('static_renderer'))
|
error('Invalid static_renderer option ', get_option('static_renderer'))
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
engine_link_libs = [runtime_lib, gfx_lib, app_lib]
|
engine_link_libs = [runtime_lib]
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Unit/Integration test driver
|
# Unit/Integration test driver
|
||||||
@ -133,13 +140,3 @@ if get_option('game_as_subdir')
|
|||||||
endforeach
|
endforeach
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# For Cross builds, we need libgcc from mingw
|
|
||||||
if build_machine.system() == 'linux' and host_machine.system() == 'windows'
|
|
||||||
custom_target('copy libgcc',
|
|
||||||
input : '/usr/lib/gcc/x86_64-w64-mingw32/10-win32/libgcc_s_seh-1.dll',
|
|
||||||
output : 'libgcc_s_seh-1.dll',
|
|
||||||
command : [copy_util, '@INPUT@', '@OUTPUT@'],
|
|
||||||
build_by_default : true)
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
option('static_renderer', type : 'string', value : 'dx11', description : 'Name of the renderer used for static builds')
|
option('static_renderer', type : 'string', value : 'dx11', description : 'Name of the renderer used for static builds')
|
||||||
option('use_xlib', type : 'boolean', value : false, description : 'Use Xlib for window creation under linux')
|
option('use_xlib', type : 'boolean', value : true, description : 'Use Xlib for window creation under linux')
|
||||||
|
option('use_wayland', type : 'boolean', value : false, description : 'Use wayland for window creation under linux')
|
||||||
option('error_report_debugbreak', type : 'boolean', value : true, description : 'Debugbreak in ReportError')
|
option('error_report_debugbreak', type : 'boolean', value : true, description : 'Debugbreak in ReportError')
|
||||||
option('enable_dxc_shader_compiler', type : 'boolean', value : true, description : 'Enables building the dxc-based shader compiler.')
|
option('enable_dxc_shader_compiler', type : 'boolean', value : true, description : 'Enables building the dxc-based shader compiler.')
|
||||||
option('enable_dx11_shader_compiler', type : 'boolean', value : true, description : 'Enables building the dx11-bases shader compiler.')
|
option('enable_dx11_shader_compiler', type : 'boolean', value : true, description : 'Enables building the dx11-bases shader compiler.')
|
||||||
option('game_as_subdir', type : 'boolean', value : false, description : 'If true, adds the directory "src/game" to the build.')
|
option('game_as_subdir', type : 'boolean', value : false, description : 'If true, adds the directory "src/game" to the build.')
|
||||||
option('build_dx11', type : 'boolean', value : true, description : 'Enables/disables the build of the dx11 renderer.')
|
option('build_dx11', type : 'boolean', value : true, description : 'Enables/disables the build of the dx11 renderer.')
|
||||||
|
option('build_vk', type : 'boolean', value : false, description : 'Enables/disables the build of the vulkan renderer.')
|
||||||
option('build_experiments', type : 'boolean', value : false, description : 'Enables/disables building the experiments in src/experimental.')
|
option('build_experiments', type : 'boolean', value : false, description : 'Enables/disables building the experiments in src/experimental.')
|
||||||
|
option('launcher_name', type : 'string', value : 'launcher', description : 'Name of the launcher executable.', yield : true)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#include "asset_compiler.h"
|
#include "asset_compiler.h"
|
||||||
#include "processor.h"
|
#include "processor.h"
|
||||||
|
|
||||||
#include "runtime/aio.h"
|
#include "runtime/rt_aio.h"
|
||||||
#include "runtime/buffer_manager.h"
|
#include "runtime/buffer_manager.h"
|
||||||
#include "runtime/config.h"
|
#include "runtime/config.h"
|
||||||
#include "runtime/file_tab.h"
|
#include "runtime/file_tab.h"
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#include "runtime/runtime.h"
|
#include "runtime/runtime.h"
|
||||||
#include "runtime/mem_arena.h"
|
#include "runtime/mem_arena.h"
|
||||||
#include "renderer/common/renderer_api.h"
|
#include "renderer/renderer.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -7,6 +7,8 @@ struct mesh_data {
|
|||||||
|
|
||||||
struct meshlet_data {
|
struct meshlet_data {
|
||||||
vec4 bounds; // xyz, radius
|
vec4 bounds; // xyz, radius
|
||||||
|
vec4 cone_apex; // xyz, unused
|
||||||
|
vec4 cone_axis_cutoff; // xyz, cutoff = w
|
||||||
uint mesh_index;
|
uint mesh_index;
|
||||||
uint first_index;
|
uint first_index;
|
||||||
uint base_vertex;
|
uint base_vertex;
|
||||||
@ -42,10 +44,17 @@ void main() {
|
|||||||
meshlet_data meshlet = meshlets[meshlet_idx];
|
meshlet_data meshlet = meshlets[meshlet_idx];
|
||||||
mesh_data mesh = meshes[meshlet.mesh_index];
|
mesh_data mesh = meshes[meshlet.mesh_index];
|
||||||
|
|
||||||
uint draw_idx = atomicAdd(draw_count, 1);
|
vec3 cone_axis = (mesh.model * vec4(meshlet.cone_axis_cutoff.xyz, 0)).xyz;
|
||||||
draw_commands[draw_idx].count = meshlet.index_count;
|
vec3 cone_apex = (mesh.model * vec4(meshlet.cone_apex.xyz, 1)).xyz;
|
||||||
draw_commands[draw_idx].instance_count = 1;
|
float cone_cutoff = meshlet.cone_axis_cutoff.w;
|
||||||
draw_commands[draw_idx].first_index = meshlet.first_index;
|
|
||||||
draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex);
|
|
||||||
draw_commands[draw_idx].base_instance = 0;
|
if (dot(normalize(cone_apex - camera_pos), cone_axis) < cone_cutoff) {
|
||||||
|
uint draw_idx = atomicAdd(draw_count, 1);
|
||||||
|
draw_commands[draw_idx].count = meshlet.index_count;
|
||||||
|
draw_commands[draw_idx].instance_count = 1;
|
||||||
|
draw_commands[draw_idx].first_index = meshlet.first_index;
|
||||||
|
draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex);
|
||||||
|
draw_commands[draw_idx].base_instance = 0;
|
||||||
|
}
|
||||||
}
|
}
|
@ -1 +0,0 @@
|
|||||||
|
|
@ -1,18 +1,17 @@
|
|||||||
#include <glad/glad.h>
|
#include <glad/glad.h>
|
||||||
#include <GLFW/glfw3.h>
|
#include <GLFW/glfw3.h>
|
||||||
#include <runtime/runtime.h>
|
#include <runtime/runtime.h>
|
||||||
|
#include <launcher/game_api.h>
|
||||||
|
|
||||||
#include <imgui.h>
|
#include <imgui.h>
|
||||||
#include <imgui_impl_glfw.h>
|
#include <imgui_impl_glfw.h>
|
||||||
#include <imgui_impl_opengl3.h>
|
#include <imgui_impl_opengl3.h>
|
||||||
|
|
||||||
|
#include <new>
|
||||||
|
|
||||||
#include "meshlet_generator.hpp"
|
#include "meshlet_generator.hpp"
|
||||||
#include "meshlet_renderer.hpp"
|
#include "meshlet_renderer.hpp"
|
||||||
|
|
||||||
static void GlfwErrorCallback(int errnum, const char *desc) {
|
|
||||||
rtReportError("GLFW", "Error %d: %s", errnum, desc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void GLDebugCallback(GLenum source,
|
static void GLDebugCallback(GLenum source,
|
||||||
GLenum type,
|
GLenum type,
|
||||||
GLuint id,
|
GLuint id,
|
||||||
@ -112,7 +111,7 @@ struct file_picker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Returns true if a new file was selected */
|
/* Returns true if a new file was selected */
|
||||||
bool Run() {
|
bool RunFlat() {
|
||||||
bool opened=false;
|
bool opened=false;
|
||||||
if (ImGui::Begin("File Selection", &m_open)) {
|
if (ImGui::Begin("File Selection", &m_open)) {
|
||||||
ImGui::InputTextWithHint("Path",
|
ImGui::InputTextWithHint("Path",
|
||||||
@ -125,7 +124,6 @@ struct file_picker {
|
|||||||
strcpy(m_input_buf[m_active_input], m_picked);
|
strcpy(m_input_buf[m_active_input], m_picked);
|
||||||
opened = true;
|
opened = true;
|
||||||
}
|
}
|
||||||
ImGui::Checkbox("Flat", &m_flat);
|
|
||||||
ImGui::End();
|
ImGui::End();
|
||||||
}
|
}
|
||||||
return opened;
|
return opened;
|
||||||
@ -139,33 +137,24 @@ struct file_picker {
|
|||||||
return m_picked != nullptr;
|
return m_picked != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool m_flat = true;
|
|
||||||
const char *m_picked=nullptr;
|
const char *m_picked=nullptr;
|
||||||
bool m_open = true;
|
bool m_open = true;
|
||||||
char m_input_buf[2][260];
|
char m_input_buf[2][260];
|
||||||
int m_active_input = 0;
|
int m_active_input = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
int main() {
|
struct meshlets_exp {
|
||||||
if (rtInitRuntime() != RT_SUCCESS)
|
file_picker picker;
|
||||||
return -1;
|
meshlet_generator gen;
|
||||||
glfwSetErrorCallback(GlfwErrorCallback);
|
meshlet_renderer ren;
|
||||||
if (!glfwInit())
|
};
|
||||||
return -1;
|
|
||||||
|
|
||||||
|
void MeshletsRegisterCVARs(rt_launcher_api *api) {}
|
||||||
|
|
||||||
|
rt_result MeshletsInit(rt_launcher_api *api) {
|
||||||
rtLog("EXP", "Meshlets experiment starting.");
|
rtLog("EXP", "Meshlets experiment starting.");
|
||||||
|
|
||||||
glfwWindowHint(GLFW_CLIENT_API, GLFW_OPENGL_API);
|
|
||||||
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
|
|
||||||
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
|
|
||||||
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6);
|
|
||||||
glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT, GLFW_TRUE);
|
|
||||||
GLFWwindow *window = glfwCreateWindow(1280, 720, "MESHLETS!", NULL, NULL);
|
|
||||||
if (!window)
|
|
||||||
return -1;
|
|
||||||
glfwMakeContextCurrent(window);
|
|
||||||
if (!gladLoadGL())
|
if (!gladLoadGL())
|
||||||
return -1;
|
return RT_UNKNOWN_ERROR;
|
||||||
glfwSwapInterval(1);
|
glfwSwapInterval(1);
|
||||||
|
|
||||||
int flags;
|
int flags;
|
||||||
@ -177,6 +166,10 @@ int main() {
|
|||||||
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, nullptr, GL_TRUE);
|
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, nullptr, GL_TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rt_window window = api->GetWindow();
|
||||||
|
if (window.type != RT_WINDOW_TYPE_GLFW)
|
||||||
|
return RT_INVALID_VALUE;
|
||||||
|
|
||||||
// Setup Dear ImGui context
|
// Setup Dear ImGui context
|
||||||
IMGUI_CHECKVERSION();
|
IMGUI_CHECKVERSION();
|
||||||
ImGui::CreateContext();
|
ImGui::CreateContext();
|
||||||
@ -185,52 +178,61 @@ int main() {
|
|||||||
io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard; // Enable Keyboard Controls
|
io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard; // Enable Keyboard Controls
|
||||||
io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad; // Enable Gamepad Controls
|
io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad; // Enable Gamepad Controls
|
||||||
ImGui::StyleColorsDark();
|
ImGui::StyleColorsDark();
|
||||||
ImGui_ImplGlfw_InitForOpenGL(window, true);
|
ImGui_ImplGlfw_InitForOpenGL(window.glfw, true);
|
||||||
ImGui_ImplOpenGL3_Init("#version 130");
|
ImGui_ImplOpenGL3_Init("#version 130");
|
||||||
|
|
||||||
file_picker picker;
|
void *game_obj = api->AllocGameObject(sizeof(meshlets_exp));
|
||||||
meshlet_generator gen;
|
meshlets_exp *exp = new (game_obj) meshlets_exp;
|
||||||
meshlet_renderer ren;
|
exp->ren.Initialize();
|
||||||
ren.Initialize();
|
|
||||||
|
|
||||||
bool flat = true;
|
return RT_SUCCESS;
|
||||||
uint32_t root_idx;
|
}
|
||||||
|
|
||||||
while (!glfwWindowShouldClose(window)) {
|
void MeshletsShutdown(rt_launcher_api *api, void *game_obj) {}
|
||||||
glfwPollEvents();
|
|
||||||
ImGui_ImplOpenGL3_NewFrame();
|
|
||||||
ImGui_ImplGlfw_NewFrame();
|
|
||||||
ImGui::NewFrame();
|
|
||||||
|
|
||||||
if (picker.Run()) {
|
void MeshletsUpdate(rt_launcher_api *api, rt_time_delta delta, void *game_obj) {
|
||||||
gen.LoadObj(picker.GetPicked());
|
ImGui_ImplOpenGL3_NewFrame();
|
||||||
flat = picker.m_flat;
|
ImGui_ImplGlfw_NewFrame();
|
||||||
if (flat)
|
ImGui::NewFrame();
|
||||||
gen.RunFlat();
|
|
||||||
else
|
|
||||||
gen.RunHierarchical(0,&root_idx);
|
|
||||||
}
|
|
||||||
ren.SettingMenu();
|
|
||||||
|
|
||||||
ImGui::Render();
|
meshlets_exp *exp = (meshlets_exp *)game_obj;
|
||||||
int display_w, display_h;
|
if (exp->picker.RunFlat()) {
|
||||||
glfwGetFramebufferSize(window, &display_w, &display_h);
|
exp->gen.LoadObj(exp->picker.GetPicked());
|
||||||
glViewport(0, 0, display_w, display_h);
|
exp->gen.RunFlat();
|
||||||
glClearColor(0.f, 0.f, 0.f, 1.f);
|
}
|
||||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
exp->ren.SettingMenu();
|
||||||
|
|
||||||
if (picker.HasPickedFile()) {
|
ImGui::Render();
|
||||||
ren.m_aspect = (float)display_w / (float)display_h;
|
}
|
||||||
if (flat)
|
|
||||||
ren.RenderFlat(gen.m_meshlets, gen.m_num_meshlets);
|
|
||||||
}
|
|
||||||
|
|
||||||
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
|
void MeshletsRender(rt_launcher_api *api, void *game_obj) {
|
||||||
glfwSwapBuffers(window);
|
rt_window window = api->GetWindow();
|
||||||
|
if (window.type != RT_WINDOW_TYPE_GLFW)
|
||||||
|
return;
|
||||||
|
|
||||||
|
meshlets_exp *exp = (meshlets_exp *)game_obj;
|
||||||
|
|
||||||
|
int display_w, display_h;
|
||||||
|
glfwGetFramebufferSize(window.glfw, &display_w, &display_h);
|
||||||
|
glViewport(0, 0, display_w, display_h);
|
||||||
|
glClearColor(0.f, 0.f, 0.f, 1.f);
|
||||||
|
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||||
|
|
||||||
|
if (exp->picker.HasPickedFile()) {
|
||||||
|
exp->ren.m_aspect = (float)display_w / (float)display_h;
|
||||||
|
exp->ren.RenderFlat(exp->gen.m_meshlets, exp->gen.m_num_meshlets);
|
||||||
}
|
}
|
||||||
|
|
||||||
glfwDestroyWindow(window);
|
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
|
||||||
glfwTerminate();
|
}
|
||||||
rtShutdownRuntime();
|
|
||||||
return 0;
|
LOAD_GAME_API_FUNC {
|
||||||
|
return (rt_game_api){
|
||||||
|
.RegisterCVARs = MeshletsRegisterCVARs,
|
||||||
|
.Init = MeshletsInit,
|
||||||
|
.Shutdown = MeshletsShutdown,
|
||||||
|
.Update = MeshletsUpdate,
|
||||||
|
.Render = MeshletsRender,
|
||||||
|
.OnGameObjectFree = NULL,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,15 @@
|
|||||||
#include <meshoptimizer.h>
|
#include <meshoptimizer.h>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
#pragma warning(push)
|
#pragma warning(push)
|
||||||
#pragma warning(disable : 4530)
|
#pragma warning(disable : 4530)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define TINYOBJLOADER_IMPLEMENTATION
|
#define TINYOBJLOADER_IMPLEMENTATION
|
||||||
#include <tiny_obj_loader.h>
|
#include <tiny_obj_loader.h>
|
||||||
|
#ifdef _MSC_VER
|
||||||
#pragma warning(pop)
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
#include <unordered_map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "meshlet_generator.hpp"
|
#include "meshlet_generator.hpp"
|
||||||
#include <runtime/mem_arena.h>
|
#include <runtime/mem_arena.h>
|
||||||
@ -121,13 +124,13 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
|
|||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||||
|
|
||||||
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
|
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
|
||||||
L0_MESHLET_VERTICES,
|
MESHLET_VERTICES,
|
||||||
L0_MESHLET_TRIANGLES);
|
MESHLET_TRIANGLES);
|
||||||
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
|
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
|
||||||
unsigned int *meshlet_vertices =
|
unsigned int *meshlet_vertices =
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *L0_MESHLET_VERTICES);
|
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES);
|
||||||
unsigned char *meshlet_triangles =
|
unsigned char *meshlet_triangles =
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *L0_MESHLET_INDICES);
|
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES);
|
||||||
|
|
||||||
size_t meshlet_count = meshopt_buildMeshlets(meshlets,
|
size_t meshlet_count = meshopt_buildMeshlets(meshlets,
|
||||||
meshlet_vertices,
|
meshlet_vertices,
|
||||||
@ -137,8 +140,8 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
|
|||||||
&m_meshes[mesh_idx].vertices[0].vx,
|
&m_meshes[mesh_idx].vertices[0].vx,
|
||||||
m_meshes[mesh_idx].num_vertices,
|
m_meshes[mesh_idx].num_vertices,
|
||||||
sizeof(vertex),
|
sizeof(vertex),
|
||||||
L0_MESHLET_VERTICES,
|
MESHLET_VERTICES,
|
||||||
L0_MESHLET_TRIANGLES,
|
MESHLET_TRIANGLES,
|
||||||
cone_weight);
|
cone_weight);
|
||||||
|
|
||||||
m_meshlets = new meshlet[meshlet_count];
|
m_meshlets = new meshlet[meshlet_count];
|
||||||
@ -158,26 +161,24 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
|
|||||||
m_meshes[mesh_idx].num_vertices,
|
m_meshes[mesh_idx].num_vertices,
|
||||||
sizeof(vertex));
|
sizeof(vertex));
|
||||||
|
|
||||||
m_meshlets[i].vertices = new vertex[L0_MESHLET_VERTICES];
|
m_meshlets[i].vertices = new vertex[MESHLET_VERTICES];
|
||||||
m_meshlets[i].indices = new uint16_t[L0_MESHLET_INDICES];
|
m_meshlets[i].indices = new uint8_t[MESHLET_INDICES];
|
||||||
m_meshlets[i].num_vertices = meshlets[i].vertex_count;
|
m_meshlets[i].num_vertices = meshlets[i].vertex_count;
|
||||||
m_meshlets[i].num_indices = meshlets[i].triangle_count * 3;
|
m_meshlets[i].num_indices = meshlets[i].triangle_count * 3;
|
||||||
memcpy(m_meshlets[i].center, bounds.center, sizeof(bounds.center));
|
memcpy(m_meshlets[i].center, bounds.center, sizeof(bounds.center));
|
||||||
m_meshlets[i].radius = bounds.radius;
|
m_meshlets[i].radius = bounds.radius;
|
||||||
|
memcpy(m_meshlets[i].cone_axis, bounds.cone_axis, sizeof(bounds.cone_axis));
|
||||||
|
m_meshlets[i].cone_cutoff = bounds.cone_cutoff;
|
||||||
|
memcpy(m_meshlets[i].cone_apex, bounds.cone_apex, sizeof(bounds.cone_apex));
|
||||||
|
|
||||||
for (unsigned int vert_idx = 0; vert_idx < meshlets[i].vertex_count; ++vert_idx) {
|
for (unsigned int vert_idx = 0; vert_idx < meshlets[i].vertex_count; ++vert_idx) {
|
||||||
unsigned int vert = meshlet_vertices[meshlets[i].vertex_offset + vert_idx];
|
unsigned int vert = meshlet_vertices[meshlets[i].vertex_offset + vert_idx];
|
||||||
m_meshlets[i].vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert];
|
m_meshlets[i].vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int tri_idx = 0; tri_idx < meshlets[i].triangle_count; ++tri_idx) {
|
memcpy(m_meshlets[i].indices,
|
||||||
m_meshlets[i].indices[tri_idx * 3 + 0] =
|
meshlet_triangles + meshlets[i].triangle_offset,
|
||||||
(uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 0];
|
meshlets[i].triangle_count * 3);
|
||||||
m_meshlets[i].indices[tri_idx * 3 + 1] =
|
|
||||||
(uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 1];
|
|
||||||
m_meshlets[i].indices[tri_idx * 3 + 2] =
|
|
||||||
(uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 2];
|
|
||||||
}
|
|
||||||
m_meshlets[i].num_children = 0u;
|
m_meshlets[i].num_children = 0u;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -189,81 +190,49 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
|
|||||||
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||||
|
|
||||||
// Convert meshopt meshlets into our meshlets, each with its own vertex and index buffer
|
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
|
||||||
std::vector<meshlet> meshlets;
|
MESHLET_VERTICES,
|
||||||
{
|
MESHLET_TRIANGLES);
|
||||||
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
|
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
|
||||||
L0_MESHLET_VERTICES,
|
unsigned int *meshlet_vertices =
|
||||||
L0_MESHLET_TRIANGLES);
|
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES);
|
||||||
meshopt_Meshlet *meshopt_meshlets =
|
unsigned char *meshlet_triangles =
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
|
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES);
|
||||||
unsigned int *meshlet_vertices =
|
meshopt_Bounds *meshlet_bounds = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Bounds, max_meshlets);
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *L0_MESHLET_VERTICES);
|
|
||||||
unsigned char *meshlet_triangles =
|
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *L0_MESHLET_INDICES);
|
|
||||||
|
|
||||||
uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshopt_meshlets,
|
uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshlets,
|
||||||
meshlet_vertices,
|
meshlet_vertices,
|
||||||
meshlet_triangles,
|
meshlet_triangles,
|
||||||
m_meshes[mesh_idx].indices,
|
m_meshes[mesh_idx].indices,
|
||||||
m_meshes[mesh_idx].num_indices,
|
m_meshes[mesh_idx].num_indices,
|
||||||
&m_meshes[mesh_idx].vertices[0].vx,
|
&m_meshes[mesh_idx].vertices[0].vx,
|
||||||
m_meshes[mesh_idx].num_vertices,
|
m_meshes[mesh_idx].num_vertices,
|
||||||
sizeof(vertex),
|
sizeof(vertex),
|
||||||
L0_MESHLET_VERTICES,
|
MESHLET_VERTICES,
|
||||||
L0_MESHLET_TRIANGLES,
|
MESHLET_TRIANGLES,
|
||||||
cone_weight);
|
cone_weight);
|
||||||
|
|
||||||
for (size_t i = 0; i < meshlet_count; ++i) {
|
for (size_t i = 0; i < meshlet_count; ++i) {
|
||||||
meshopt_optimizeMeshlet(&meshlet_vertices[meshopt_meshlets[i].vertex_offset],
|
meshopt_optimizeMeshlet(&meshlet_vertices[meshlets[i].vertex_offset],
|
||||||
&meshlet_triangles[meshopt_meshlets[i].triangle_offset],
|
&meshlet_triangles[meshlets[i].triangle_offset],
|
||||||
meshopt_meshlets[i].triangle_count,
|
meshlets[i].triangle_count,
|
||||||
meshopt_meshlets[i].vertex_count);
|
meshlets[i].vertex_count);
|
||||||
|
|
||||||
meshopt_Bounds bounds = meshopt_computeMeshletBounds(
|
meshlet_bounds[i] =
|
||||||
&meshlet_vertices[meshopt_meshlets[i].vertex_offset],
|
meshopt_computeMeshletBounds(&meshlet_vertices[meshlets[i].vertex_offset],
|
||||||
&meshlet_triangles[meshopt_meshlets[i].triangle_offset],
|
&meshlet_triangles[meshlets[i].triangle_offset],
|
||||||
meshopt_meshlets[i].triangle_count,
|
meshlets[i].triangle_count,
|
||||||
&m_meshes[mesh_idx].vertices[0].vx,
|
&m_meshes[mesh_idx].vertices[0].vx,
|
||||||
m_meshes[mesh_idx].num_vertices,
|
m_meshes[mesh_idx].num_vertices,
|
||||||
sizeof(vertex));
|
sizeof(vertex));
|
||||||
|
|
||||||
meshlet meshlet;
|
|
||||||
meshlet.vertices = new vertex[L0_MESHLET_VERTICES];
|
|
||||||
meshlet.indices = new uint16_t[L0_MESHLET_INDICES];
|
|
||||||
meshlet.num_vertices = meshopt_meshlets[i].vertex_count;
|
|
||||||
meshlet.num_indices = meshopt_meshlets[i].triangle_count * 3;
|
|
||||||
memcpy(meshlet.center, bounds.center, sizeof(bounds.center));
|
|
||||||
meshlet.radius = bounds.radius;
|
|
||||||
|
|
||||||
for (unsigned int vert_idx = 0; vert_idx < meshopt_meshlets[i].vertex_count;
|
|
||||||
++vert_idx) {
|
|
||||||
unsigned int vert = meshlet_vertices[meshopt_meshlets[i].vertex_offset + vert_idx];
|
|
||||||
meshlet.vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned int tri_idx = 0; tri_idx < meshopt_meshlets[i].triangle_count;
|
|
||||||
++tri_idx) {
|
|
||||||
meshlet.indices[tri_idx * 3 + 0] = (uint16_t)
|
|
||||||
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 0];
|
|
||||||
meshlet.indices[tri_idx * 3 + 1] = (uint16_t)
|
|
||||||
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 1];
|
|
||||||
meshlet.indices[tri_idx * 3 + 2] = (uint16_t)
|
|
||||||
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 2];
|
|
||||||
}
|
|
||||||
|
|
||||||
meshlet.num_children = 0u;
|
|
||||||
|
|
||||||
meshlets.push_back(meshlet);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// We now have a flat list of meshlets -> the highest lod ones
|
// We now have a flat list of meshlets -> the highest lod ones
|
||||||
// We now combine (up to 8) meshlets into one to generate the next hierarchy level
|
// We now combine (up to 8) meshlets into one to generate the next hierarchy level
|
||||||
// Repeat until we only have 1 meshlet left
|
// Repeat until we only have 1 meshlet left
|
||||||
std::vector<uint32_t> unprocessed;
|
std::vector<uint32_t> unprocessed;
|
||||||
unprocessed.reserve(meshlets.size());
|
unprocessed.reserve(meshlet_count);
|
||||||
for (uint32_t i = 0; i < meshlets.size(); ++i) {
|
for (uint32_t i = 0; i < meshlet_count; ++i) {
|
||||||
unprocessed.push_back(i);
|
unprocessed.push_back(i);
|
||||||
}
|
}
|
||||||
std::vector<uint32_t> next_level;
|
std::vector<uint32_t> next_level;
|
||||||
@ -278,11 +247,11 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
|
|||||||
unsigned int closest_count = 0u;
|
unsigned int closest_count = 0u;
|
||||||
|
|
||||||
float first_center[3];
|
float first_center[3];
|
||||||
memcpy(first_center, meshlets[first].center, sizeof(float) * 3);
|
memcpy(first_center, meshlet_bounds[first].center, sizeof(float) * 3);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < unprocessed.size(); ++i) {
|
for (uint32_t i = 0; i < unprocessed.size(); ++i) {
|
||||||
float center[3];
|
float center[3];
|
||||||
memcpy(center, meshlets[unprocessed[i]].center, sizeof(float) * 3);
|
memcpy(center, meshlet_bounds[unprocessed[i]].center, sizeof(float) * 3);
|
||||||
|
|
||||||
float dist = sqrtf((center[0] - first_center[0]) * (center[0] - first_center[0]) +
|
float dist = sqrtf((center[0] - first_center[0]) * (center[0] - first_center[0]) +
|
||||||
(center[1] - first_center[1]) * (center[1] - first_center[1]) +
|
(center[1] - first_center[1]) * (center[1] - first_center[1]) +
|
||||||
@ -292,7 +261,7 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
|
|||||||
// Check if we are closer than one of the other candidates
|
// Check if we are closer than one of the other candidates
|
||||||
for (unsigned int j = 0; j < closest_count; ++j) {
|
for (unsigned int j = 0; j < closest_count; ++j) {
|
||||||
uint32_t highest_idx = UINT_MAX;
|
uint32_t highest_idx = UINT_MAX;
|
||||||
float highest_dist = dist;
|
float highest_dist = dist;
|
||||||
if (dist < distances[j]) {
|
if (dist < distances[j]) {
|
||||||
if (distances[j] > highest_dist) {
|
if (distances[j] > highest_dist) {
|
||||||
highest_dist = distances[j];
|
highest_dist = distances[j];
|
||||||
@ -300,14 +269,14 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (highest_idx < RT_ARRAY_COUNT(closest)) {
|
if (highest_idx < RT_ARRAY_COUNT(closest)) {
|
||||||
const uint32_t replaced = closest[highest_idx];
|
const uint32_t replaced = highest_idx;
|
||||||
distances[j] = dist;
|
distances[j] = dist;
|
||||||
closest[j] = unprocessed[i];
|
closest[j] = i;
|
||||||
unprocessed.push_back(replaced);
|
unprocessed.push_back(replaced);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
closest[closest_count] = unprocessed[i];
|
closest[closest_count] = i;
|
||||||
distances[closest_count] = dist;
|
distances[closest_count] = dist;
|
||||||
closest_count++;
|
closest_count++;
|
||||||
unprocessed.erase(unprocessed.begin() + i);
|
unprocessed.erase(unprocessed.begin() + i);
|
||||||
@ -316,81 +285,6 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
|
|||||||
|
|
||||||
// Combine into a new meshlet
|
// Combine into a new meshlet
|
||||||
// vertex *vertices = new vertex[MESHLET_VERTICES * 8];
|
// vertex *vertices = new vertex[MESHLET_VERTICES * 8];
|
||||||
|
|
||||||
vertex *in_vertices = new vertex[L0_MESHLET_VERTICES * 8];
|
|
||||||
uint32_t *in_indices = new uint32_t[L0_MESHLET_INDICES * 8];
|
|
||||||
memcpy(in_vertices,
|
|
||||||
meshlets[first].vertices,
|
|
||||||
sizeof(vertex) * meshlets[first].num_vertices);
|
|
||||||
size_t at_vert = meshlets[first].num_vertices;
|
|
||||||
for (unsigned int i = 0; i < meshlets[first].num_indices; ++i)
|
|
||||||
in_indices[i] = static_cast<uint32_t>(meshlets[first].indices[i]);
|
|
||||||
size_t at_idx = meshlets[first].num_indices;
|
|
||||||
for (unsigned int i = 0; i < closest_count; ++i) {
|
|
||||||
memcpy(&in_vertices[at_vert],
|
|
||||||
meshlets[closest[i]].vertices,
|
|
||||||
sizeof(vertex) * meshlets[closest[i]].num_vertices);
|
|
||||||
at_vert += meshlets[closest[i]].num_vertices;
|
|
||||||
|
|
||||||
for (unsigned int j = 0; j < meshlets[closest[i]].num_indices; ++j)
|
|
||||||
in_indices[at_idx + j] = static_cast<uint32_t>(meshlets[closest[i]].indices[j]);
|
|
||||||
at_idx += meshlets[closest[i]].num_indices;
|
|
||||||
}
|
|
||||||
|
|
||||||
const float target_error =
|
|
||||||
0.10f; // Acept 10% error. We are simplifying meshlets, the extents are not very large
|
|
||||||
const size_t target_index_count = L0_MESHLET_INDICES;
|
|
||||||
const unsigned int simplify_options = meshopt_SimplifyLockBorder;
|
|
||||||
const float threshold = 1.f / 8.f;
|
|
||||||
std::vector<unsigned int> lod(UINT16_MAX);
|
|
||||||
float lod_error = 0.f;
|
|
||||||
size_t out_index_count = meshopt_simplify(&lod[0],
|
|
||||||
in_indices,
|
|
||||||
at_idx,
|
|
||||||
&in_vertices[0].vx,
|
|
||||||
at_vert,
|
|
||||||
sizeof(vertex),
|
|
||||||
target_index_count,
|
|
||||||
target_error,
|
|
||||||
simplify_options,
|
|
||||||
&lod_error);
|
|
||||||
lod.resize(out_index_count);
|
|
||||||
if (out_index_count < UINT16_MAX) {
|
|
||||||
rtLog("EXP", "Yay");
|
|
||||||
} else {
|
|
||||||
rtReportError("EXP", "Nay");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unordered_map<unsigned int, uint16_t> index_remap;
|
|
||||||
uint16_t *index_buffer = new uint16_t[out_index_count];
|
|
||||||
std::vector<vertex> merged_vertices;
|
|
||||||
merged_vertices.reserve(out_index_count);
|
|
||||||
for (size_t i = 0; i < out_index_count; ++i) {
|
|
||||||
if (index_remap.find(lod[i]) != index_remap.end()) {
|
|
||||||
index_buffer[i] = index_remap[lod[i]];
|
|
||||||
} else {
|
|
||||||
uint16_t index = (uint16_t)merged_vertices.size();
|
|
||||||
merged_vertices.push_back(in_vertices[lod[i]]);
|
|
||||||
index_buffer[i] = index;
|
|
||||||
index_remap[lod[i]] = index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
meshlet merged = {};
|
|
||||||
merged.num_children = closest_count + 1;
|
|
||||||
merged.children[0] = first;
|
|
||||||
memcpy(merged.children, closest, sizeof(uint32_t) * closest_count);
|
|
||||||
merged.indices = index_buffer;
|
|
||||||
merged.num_indices = (uint32_t)out_index_count;
|
|
||||||
merged.vertices = new vertex[merged_vertices.size()];
|
|
||||||
merged.num_vertices = (uint32_t)merged_vertices.size();
|
|
||||||
memcpy(merged.vertices, merged_vertices.data(), sizeof(vertex) * merged_vertices.size());
|
|
||||||
|
|
||||||
// Determine center and bounds
|
|
||||||
|
|
||||||
|
|
||||||
meshlets.push_back(merged);
|
|
||||||
next_level.push_back((unsigned int)meshlets.size() - 1);
|
|
||||||
}
|
}
|
||||||
return RT_SUCCESS;
|
return RT_SUCCESS;
|
||||||
}
|
}
|
@ -3,9 +3,9 @@
|
|||||||
|
|
||||||
#include "runtime/runtime.h"
|
#include "runtime/runtime.h"
|
||||||
|
|
||||||
constexpr size_t L0_MESHLET_VERTICES = 64;
|
constexpr size_t MESHLET_VERTICES = 64;
|
||||||
constexpr size_t L0_MESHLET_TRIANGLES = 124;
|
constexpr size_t MESHLET_TRIANGLES = 124;
|
||||||
constexpr size_t L0_MESHLET_INDICES = L0_MESHLET_TRIANGLES * 3;
|
constexpr size_t MESHLET_INDICES = MESHLET_TRIANGLES * 3;
|
||||||
|
|
||||||
struct vertex {
|
struct vertex {
|
||||||
float vx, vy, vz;
|
float vx, vy, vz;
|
||||||
@ -13,13 +13,18 @@ struct vertex {
|
|||||||
|
|
||||||
struct meshlet {
|
struct meshlet {
|
||||||
vertex *vertices;
|
vertex *vertices;
|
||||||
uint16_t *indices;
|
uint8_t *indices;
|
||||||
uint32_t num_vertices;
|
uint32_t num_vertices;
|
||||||
uint32_t num_indices;
|
uint32_t num_indices;
|
||||||
|
|
||||||
float center[3];
|
float center[3];
|
||||||
float radius;
|
float radius;
|
||||||
|
|
||||||
|
// Normal cone for backface culling
|
||||||
|
float cone_apex[3];
|
||||||
|
float cone_axis[3];
|
||||||
|
float cone_cutoff;
|
||||||
|
|
||||||
// child indices
|
// child indices
|
||||||
uint32_t children[8];
|
uint32_t children[8];
|
||||||
uint32_t num_children;
|
uint32_t num_children;
|
||||||
|
@ -98,7 +98,7 @@ shader shader::CompileFile(const char *compute_path) {
|
|||||||
fclose(f);
|
fclose(f);
|
||||||
|
|
||||||
sh = CompileSource(buf);
|
sh = CompileSource(buf);
|
||||||
delete buf;
|
delete[] buf;
|
||||||
return sh;
|
return sh;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,6 +108,8 @@ struct gpu_mesh_data {
|
|||||||
|
|
||||||
struct gpu_meshlet_data {
|
struct gpu_meshlet_data {
|
||||||
float bounds[4]; // xyz, radius
|
float bounds[4]; // xyz, radius
|
||||||
|
float cone_apex[4]; // xyz, unused
|
||||||
|
float cone_axis_cutoff[4];
|
||||||
uint32_t mesh_index;
|
uint32_t mesh_index;
|
||||||
uint32_t first_index;
|
uint32_t first_index;
|
||||||
uint32_t base_vertex;
|
uint32_t base_vertex;
|
||||||
@ -174,58 +176,9 @@ rt_result meshlet_renderer::Initialize() {
|
|||||||
m_single_meshlet_shader = shader::CompileSource(_single_vert, _single_frag);
|
m_single_meshlet_shader = shader::CompileSource(_single_vert, _single_frag);
|
||||||
m_flat_cull_shader = shader::CompileFile(_flat_cull);
|
m_flat_cull_shader = shader::CompileFile(_flat_cull);
|
||||||
m_meshlet_shader = shader::CompileSource(_meshlet_vert, _meshlet_frag);
|
m_meshlet_shader = shader::CompileSource(_meshlet_vert, _meshlet_frag);
|
||||||
|
|
||||||
InitFlat();
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
return RT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OnModelLoaded(meshlet_generator *gen);
|
|
||||||
|
|
||||||
void meshlet_renderer::InitFlat(void) {
|
|
||||||
const size_t MAX_MESHLETS = 65556;
|
|
||||||
|
|
||||||
GLuint buffers[6];
|
|
||||||
glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers);
|
|
||||||
m_flat.vbo = buffers[0];
|
|
||||||
m_flat.ebo = buffers[1];
|
|
||||||
m_flat.meshlet_ssbo = buffers[2];
|
|
||||||
m_flat.draw_ssbo = buffers[3];
|
|
||||||
m_flat.cull_ssbo = buffers[4];
|
|
||||||
m_flat.mesh_ssbo = buffers[5];
|
|
||||||
|
|
||||||
// Create the vao
|
|
||||||
glGenVertexArrays(1, &m_flat.vao);
|
|
||||||
glBindVertexArray(m_flat.vao);
|
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, m_flat.vbo);
|
|
||||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_flat.ebo);
|
|
||||||
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
|
|
||||||
glEnableVertexAttribArray(0);
|
|
||||||
glBindVertexArray(0);
|
|
||||||
|
|
||||||
// Reserve space
|
|
||||||
glNamedBufferStorage(m_flat.vbo,
|
|
||||||
MAX_MESHLETS * sizeof(vertex) * L0_MESHLET_VERTICES,
|
|
||||||
nullptr,
|
|
||||||
GL_DYNAMIC_STORAGE_BIT);
|
|
||||||
glNamedBufferStorage(m_flat.ebo,
|
|
||||||
MAX_MESHLETS * sizeof(uint16_t) * L0_MESHLET_INDICES,
|
|
||||||
nullptr,
|
|
||||||
GL_DYNAMIC_STORAGE_BIT);
|
|
||||||
glNamedBufferStorage(m_flat.meshlet_ssbo,
|
|
||||||
MAX_MESHLETS * sizeof(gpu_meshlet_data),
|
|
||||||
nullptr,
|
|
||||||
GL_DYNAMIC_STORAGE_BIT);
|
|
||||||
glNamedBufferStorage(m_flat.draw_ssbo,
|
|
||||||
MAX_MESHLETS * sizeof(draw_elements_indirect_command),
|
|
||||||
nullptr,
|
|
||||||
0);
|
|
||||||
glNamedBufferStorage(m_flat.cull_ssbo, sizeof(cull_output), nullptr, GL_MAP_READ_BIT | GL_DYNAMIC_STORAGE_BIT);
|
|
||||||
|
|
||||||
glNamedBufferStorage(m_flat.mesh_ssbo, sizeof(gpu_mesh_data), nullptr, GL_DYNAMIC_STORAGE_BIT);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void meshlet_renderer::RenderFlat(const meshlet *meshlets, unsigned int count) {
|
void meshlet_renderer::RenderFlat(const meshlet *meshlets, unsigned int count) {
|
||||||
if (m_settings.separate_rendering) {
|
if (m_settings.separate_rendering) {
|
||||||
SeparateRendering(meshlets, count);
|
SeparateRendering(meshlets, count);
|
||||||
@ -240,56 +193,88 @@ void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int co
|
|||||||
// Do compute "culling" (generate drawindirect) into 1 ssbo
|
// Do compute "culling" (generate drawindirect) into 1 ssbo
|
||||||
// DrawIndirect
|
// DrawIndirect
|
||||||
|
|
||||||
// Store vertices and indices at idx * MESHLET_VERTICES/MESHLET_INDICES
|
GLuint vbo, ebo, meshlet_ssbo, draw_ssbo, cull_ssbo, mesh_ssbo;
|
||||||
|
GLuint buffers[6];
|
||||||
|
glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers);
|
||||||
|
vbo = buffers[0];
|
||||||
|
ebo = buffers[1];
|
||||||
|
meshlet_ssbo = buffers[2];
|
||||||
|
draw_ssbo = buffers[3];
|
||||||
|
cull_ssbo = buffers[4];
|
||||||
|
mesh_ssbo = buffers[5];
|
||||||
|
|
||||||
|
// Store vertices and indices at idx * MESHLET_VERTICES/MESHLET_INDICES
|
||||||
|
glNamedBufferStorage(vbo,
|
||||||
|
count * sizeof(vertex) * MESHLET_VERTICES,
|
||||||
|
nullptr,
|
||||||
|
GL_DYNAMIC_STORAGE_BIT);
|
||||||
|
glNamedBufferStorage(ebo, count * MESHLET_INDICES, nullptr, GL_DYNAMIC_STORAGE_BIT);
|
||||||
for (unsigned int i = 0; i < count; ++i) {
|
for (unsigned int i = 0; i < count; ++i) {
|
||||||
glNamedBufferSubData(m_flat.vbo,
|
glNamedBufferSubData(vbo,
|
||||||
i * sizeof(vertex) * L0_MESHLET_VERTICES,
|
i * sizeof(vertex) * MESHLET_VERTICES,
|
||||||
meshlets[i].num_vertices * sizeof(vertex),
|
meshlets[i].num_vertices * sizeof(vertex),
|
||||||
meshlets[i].vertices);
|
meshlets[i].vertices);
|
||||||
glNamedBufferSubData(m_flat.ebo,
|
glNamedBufferSubData(ebo,
|
||||||
i * L0_MESHLET_INDICES * sizeof(uint16_t),
|
i * MESHLET_INDICES,
|
||||||
meshlets[i].num_indices * sizeof(uint16_t),
|
meshlets[i].num_indices,
|
||||||
meshlets[i].indices);
|
meshlets[i].indices);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store meshlet information
|
// Store meshlet information
|
||||||
|
glNamedBufferStorage(meshlet_ssbo,
|
||||||
|
count * sizeof(gpu_meshlet_data),
|
||||||
|
nullptr,
|
||||||
|
GL_DYNAMIC_STORAGE_BIT);
|
||||||
for (unsigned int i = 0; i < count; ++i) {
|
for (unsigned int i = 0; i < count; ++i) {
|
||||||
gpu_meshlet_data meshlet;
|
gpu_meshlet_data meshlet;
|
||||||
// Only have one right now
|
// Only have one right now
|
||||||
meshlet.mesh_index = 0;
|
meshlet.mesh_index = 0;
|
||||||
meshlet.first_index = i * L0_MESHLET_INDICES;
|
meshlet.first_index = i * MESHLET_INDICES;
|
||||||
meshlet.base_vertex = i * L0_MESHLET_VERTICES;
|
meshlet.base_vertex = i * MESHLET_VERTICES;
|
||||||
meshlet.index_count = meshlets[i].num_indices;
|
meshlet.index_count = meshlets[i].num_indices;
|
||||||
memcpy(meshlet.bounds, meshlets[i].center, 3 * sizeof(float));
|
memcpy(meshlet.bounds, meshlets[i].center, 3 * sizeof(float));
|
||||||
meshlet.bounds[3] = meshlets[i].radius;
|
meshlet.bounds[3] = meshlets[i].radius;
|
||||||
glNamedBufferSubData(m_flat.meshlet_ssbo,
|
memcpy(meshlet.cone_apex, meshlets[i].cone_apex, 3 * sizeof(float));
|
||||||
|
meshlet.cone_apex[3] = 0.f;
|
||||||
|
memcpy(meshlet.cone_axis_cutoff, meshlets[i].cone_axis, sizeof(meshlets[i].cone_axis));
|
||||||
|
glNamedBufferSubData(meshlet_ssbo,
|
||||||
i * sizeof(gpu_meshlet_data),
|
i * sizeof(gpu_meshlet_data),
|
||||||
sizeof(gpu_meshlet_data),
|
sizeof(gpu_meshlet_data),
|
||||||
&meshlet);
|
&meshlet);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reserve space for the draw commands
|
||||||
|
glNamedBufferStorage(draw_ssbo, count * sizeof(draw_elements_indirect_command), nullptr, 0);
|
||||||
|
|
||||||
// Prepare culling output
|
// Prepare culling output
|
||||||
cull_output cull_output = {0};
|
cull_output cull_output = {0};
|
||||||
glNamedBufferSubData(m_flat.cull_ssbo, 0, sizeof(cull_output), &cull_output);
|
glNamedBufferStorage(cull_ssbo, sizeof(cull_output), &cull_output, GL_MAP_READ_BIT);
|
||||||
|
|
||||||
// Prepare mesh data
|
// Prepare mesh data
|
||||||
gpu_mesh_data mesh_data;
|
gpu_mesh_data mesh_data;
|
||||||
mesh_data.model = glm ::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) *
|
mesh_data.model = glm ::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) *
|
||||||
glm::scale(glm::vec3(m_settings.scale));
|
glm::scale(glm::vec3(m_settings.scale));
|
||||||
glNamedBufferSubData(m_flat.mesh_ssbo, 0, sizeof(mesh_data), &mesh_data);
|
glNamedBufferStorage(mesh_ssbo, sizeof(mesh_data), &mesh_data, GL_DYNAMIC_STORAGE_BIT);
|
||||||
|
|
||||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||||
|
|
||||||
// Do culling. TODO: Get number of draws back
|
// Do culling. TODO: Get number of draws back
|
||||||
m_flat_cull_shader.Use();
|
m_flat_cull_shader.Use();
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_flat.meshlet_ssbo);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, meshlet_ssbo);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_flat.draw_ssbo);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, draw_ssbo);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_flat.cull_ssbo);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cull_ssbo);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, m_flat.mesh_ssbo);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, mesh_ssbo);
|
||||||
glUniform3fv(0, 1, m_settings.eye);
|
glUniform3fv(0, 1, m_settings.eye);
|
||||||
glDispatchCompute(count, 1, 1);
|
glDispatchCompute(count, 1, 1);
|
||||||
|
|
||||||
|
// Create the vao
|
||||||
|
GLuint vao;
|
||||||
|
glGenVertexArrays(1, &vao);
|
||||||
|
glBindVertexArray(vao);
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
||||||
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
|
||||||
|
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
|
||||||
|
glEnableVertexAttribArray(0);
|
||||||
|
|
||||||
// DrawIndirect
|
// DrawIndirect
|
||||||
GLuint mvploc = glGetUniformLocation(m_meshlet_shader.m_prog, "mvp");
|
GLuint mvploc = glGetUniformLocation(m_meshlet_shader.m_prog, "mvp");
|
||||||
@ -310,20 +295,21 @@ void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int co
|
|||||||
m_meshlet_shader.Use();
|
m_meshlet_shader.Use();
|
||||||
glUniformMatrix4fv(mvploc, 1, GL_FALSE, &mvp[0][0]);
|
glUniformMatrix4fv(mvploc, 1, GL_FALSE, &mvp[0][0]);
|
||||||
|
|
||||||
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_flat.draw_ssbo);
|
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, draw_ssbo);
|
||||||
glMemoryBarrier(GL_COMMAND_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT);
|
glMemoryBarrier(GL_COMMAND_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
{
|
{
|
||||||
void *out = glMapNamedBuffer(m_flat.cull_ssbo, GL_READ_ONLY);
|
void *out = glMapNamedBuffer(cull_ssbo, GL_READ_ONLY);
|
||||||
memcpy(&cull_output, out, sizeof(cull_output));
|
memcpy(&cull_output, out, sizeof(cull_output));
|
||||||
glUnmapNamedBuffer(m_flat.cull_ssbo);
|
glUnmapNamedBuffer(cull_ssbo);
|
||||||
}
|
}
|
||||||
|
|
||||||
glBindVertexArray(m_flat.vao);
|
|
||||||
glMultiDrawElementsIndirect(GL_TRIANGLES,
|
glMultiDrawElementsIndirect(GL_TRIANGLES,
|
||||||
GL_UNSIGNED_SHORT,
|
GL_UNSIGNED_BYTE,
|
||||||
nullptr,
|
nullptr,
|
||||||
cull_output.draw_count,
|
cull_output.draw_count,
|
||||||
sizeof(draw_elements_indirect_command));
|
sizeof(draw_elements_indirect_command));
|
||||||
|
|
||||||
|
glDeleteBuffers(RT_ARRAY_COUNT(buffers), buffers);
|
||||||
}
|
}
|
||||||
|
|
||||||
void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int count) {
|
void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int count) {
|
||||||
@ -334,9 +320,9 @@ void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int c
|
|||||||
|
|
||||||
glBindVertexArray(vao);
|
glBindVertexArray(vao);
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
||||||
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * L0_MESHLET_VERTICES, nullptr, GL_STREAM_DRAW);
|
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * MESHLET_VERTICES, nullptr, GL_STREAM_DRAW);
|
||||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
|
||||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint16_t) * L0_MESHLET_INDICES, nullptr, GL_STREAM_DRAW);
|
glBufferData(GL_ELEMENT_ARRAY_BUFFER, MESHLET_INDICES, nullptr, GL_STREAM_DRAW);
|
||||||
|
|
||||||
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
|
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
|
||||||
glEnableVertexAttribArray(0);
|
glEnableVertexAttribArray(0);
|
||||||
@ -375,11 +361,11 @@ void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int c
|
|||||||
meshlets[i].vertices);
|
meshlets[i].vertices);
|
||||||
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,
|
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,
|
||||||
0,
|
0,
|
||||||
sizeof(uint16_t) * meshlets[i].num_indices,
|
sizeof(uint8_t) * meshlets[i].num_indices,
|
||||||
meshlets[i].indices);
|
meshlets[i].indices);
|
||||||
glUniform3fv(colorloc, 1, colors[i % 5]);
|
glUniform3fv(colorloc, 1, colors[i % 5]);
|
||||||
|
|
||||||
glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_SHORT, nullptr);
|
glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_BYTE, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
glDeleteVertexArrays(1, &vao);
|
glDeleteVertexArrays(1, &vao);
|
||||||
|
@ -24,7 +24,6 @@ struct shader {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct meshlet_renderer {
|
struct meshlet_renderer {
|
||||||
public:
|
|
||||||
struct settings {
|
struct settings {
|
||||||
bool separate_rendering = true;
|
bool separate_rendering = true;
|
||||||
float scale = 1.f;
|
float scale = 1.f;
|
||||||
@ -34,18 +33,6 @@ struct meshlet_renderer {
|
|||||||
float fov;
|
float fov;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
|
||||||
struct flat_state {
|
|
||||||
GLuint vao;
|
|
||||||
GLuint vbo;
|
|
||||||
GLuint ebo;
|
|
||||||
GLuint meshlet_ssbo;
|
|
||||||
GLuint draw_ssbo;
|
|
||||||
GLuint cull_ssbo;
|
|
||||||
GLuint mesh_ssbo;
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
rt_result Initialize();
|
rt_result Initialize();
|
||||||
|
|
||||||
void RenderFlat(const meshlet *meshlets, unsigned int count);
|
void RenderFlat(const meshlet *meshlets, unsigned int count);
|
||||||
@ -53,21 +40,16 @@ struct meshlet_renderer {
|
|||||||
void SettingMenu();
|
void SettingMenu();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void InitFlat(void);
|
|
||||||
|
|
||||||
void SeparateRendering(const meshlet *meshlets, unsigned int count);
|
void SeparateRendering(const meshlet *meshlets, unsigned int count);
|
||||||
void DrawIndirectFlat(const meshlet *meshlets, unsigned int count);
|
void DrawIndirectFlat(const meshlet *meshlets, unsigned int count);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
settings m_settings;
|
settings m_settings;
|
||||||
float m_aspect;
|
|
||||||
|
|
||||||
private:
|
|
||||||
flat_state m_flat;
|
|
||||||
|
|
||||||
shader m_single_meshlet_shader;
|
shader m_single_meshlet_shader;
|
||||||
shader m_flat_cull_shader;
|
shader m_flat_cull_shader;
|
||||||
shader m_meshlet_shader;
|
shader m_meshlet_shader;
|
||||||
|
float m_aspect;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,11 +1,9 @@
|
|||||||
glfw_proj = subproject('glfw', default_options: ['default_library=static', 'b_sanitize=none'])
|
imgui_proj = subproject('imgui', default_options: ['warning_level=0', 'werror=false'])
|
||||||
glfw_dep = glfw_proj.get_variable('glfw_dep')
|
|
||||||
imgui_proj = subproject('imgui')
|
|
||||||
imgui_dep = imgui_proj.get_variable('imgui_dep')
|
imgui_dep = imgui_proj.get_variable('imgui_dep')
|
||||||
glm_proj = subproject('glm')
|
glm_proj = subproject('glm')
|
||||||
glm_dep = glm_proj.get_variable('glm_dep')
|
glm_dep = glm_proj.get_variable('glm_dep')
|
||||||
|
|
||||||
executable('meshlet_experiment',
|
shared_library('meshlet_experiment',
|
||||||
'main.cpp',
|
'main.cpp',
|
||||||
'meshlet_generator.hpp',
|
'meshlet_generator.hpp',
|
||||||
'meshlet_generator.cpp',
|
'meshlet_generator.cpp',
|
||||||
@ -14,5 +12,5 @@ executable('meshlet_experiment',
|
|||||||
contrib_dir / 'glad/glad.c',
|
contrib_dir / 'glad/glad.c',
|
||||||
extra_files: ['flat_cull.glsl', 'hierarchical_cull.glsl'],
|
extra_files: ['flat_cull.glsl', 'hierarchical_cull.glsl'],
|
||||||
include_directories: [engine_incdir, contrib_incdir],
|
include_directories: [engine_incdir, contrib_incdir],
|
||||||
dependencies: [m_dep, meshoptimizer_dep, glfw_dep, imgui_dep, glm_dep],
|
dependencies: [m_dep, meshoptimizer_dep, imgui_dep, glfw_dep, glm_dep],
|
||||||
link_with: runtime_lib)
|
link_with: runtime_lib)
|
82
src/launcher/game_api.h
Normal file
82
src/launcher/game_api.h
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
#ifndef RT_LAUNCHER_GAME_API_H
|
||||||
|
#define RT_LAUNCHER_GAME_API_H
|
||||||
|
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/timing.h>
|
||||||
|
|
||||||
|
/* Functions offered by the launcher */
|
||||||
|
|
||||||
|
struct GLFWwindow;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
RT_WINDOW_TYPE_GLFW,
|
||||||
|
} rt_window_type;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_window_type type;
|
||||||
|
union {
|
||||||
|
GLFWwindow *glfw;
|
||||||
|
};
|
||||||
|
} rt_window;
|
||||||
|
|
||||||
|
typedef rt_window rt_launcher_get_window_fn(void);
|
||||||
|
typedef void *rt_launcher_alloc_game_object_fn(size_t sz);
|
||||||
|
typedef void *rt_launcher_get_game_object_fn(void);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
/* Returns the window used by the game */
|
||||||
|
rt_launcher_get_window_fn *GetWindow;
|
||||||
|
|
||||||
|
/* Allocates storage for the "game" object passed to the game functions. */
|
||||||
|
rt_launcher_alloc_game_object_fn *AllocGameObject;
|
||||||
|
|
||||||
|
/* Returns the object allocated via AllocGameObject */
|
||||||
|
rt_launcher_get_game_object_fn *GetGameObject;
|
||||||
|
} rt_launcher_api;
|
||||||
|
|
||||||
|
/* Functions called by the launcher */
|
||||||
|
|
||||||
|
typedef void rt_game_register_cvars_fn(rt_launcher_api *api);
|
||||||
|
typedef rt_result rt_game_initialize_fn(rt_launcher_api *api);
|
||||||
|
typedef void rt_game_shutdown_fn(rt_launcher_api *api, void *game_obj);
|
||||||
|
typedef void rt_game_update_fn(rt_launcher_api *api, rt_time_delta delta, void *game_obj);
|
||||||
|
typedef void rt_game_render_fn(rt_launcher_api *api, void *game_obj);
|
||||||
|
typedef void rt_game_on_game_object_free_fn(rt_launcher_api *api, void *game_obj);
|
||||||
|
typedef void rt_game_on_reload_fn(rt_launcher_api *api, void *game_obj);
|
||||||
|
typedef void rt_game_on_unload_fn(rt_launcher_api *api, void *game_obj);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
/* Called before initialization and before configs are read */
|
||||||
|
rt_game_register_cvars_fn *RegisterCVARs;
|
||||||
|
|
||||||
|
/* Called before entering the game-loop.
|
||||||
|
* It is expected that this is where the game object is created. */
|
||||||
|
rt_game_initialize_fn *Init;
|
||||||
|
|
||||||
|
/* Called after exiting the game-loop. */
|
||||||
|
rt_game_shutdown_fn *Shutdown;
|
||||||
|
|
||||||
|
/* Update game state */
|
||||||
|
rt_game_update_fn *Update;
|
||||||
|
|
||||||
|
/* Render the game */
|
||||||
|
rt_game_render_fn *Render;
|
||||||
|
|
||||||
|
/* These are optional and may be NULL */
|
||||||
|
|
||||||
|
/* Called by rt_laucher_api::AllocGameObject, if an old object is freed. */
|
||||||
|
rt_game_on_game_object_free_fn *OnGameObjectFree;
|
||||||
|
} rt_game_api;
|
||||||
|
|
||||||
|
|
||||||
|
/* This is the function retrieved from the game library to get the
|
||||||
|
* game_api struct used by the launcher. */
|
||||||
|
typedef rt_game_api rt_load_game_api_fn(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#define LOAD_GAME_API_FUNC extern "C" RT_DLLEXPORT rt_game_api rtLoadGameAPI()
|
||||||
|
#else
|
||||||
|
#define LOAD_GAME_API_FUNC RT_DLLEXPORT rt_game_api rtLoadGameAPI()
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
449
src/launcher/launcher.c
Normal file
449
src/launcher/launcher.c
Normal file
@ -0,0 +1,449 @@
|
|||||||
|
#include "runtime/file_tab.h"
|
||||||
|
#include "runtime/mem_arena.h"
|
||||||
|
#include "runtime/timing.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define GLFW_EXPOSE_NATIVE_WIN32
|
||||||
|
#elif defined(RT_USE_XLIB)
|
||||||
|
#define GLFW_EXPOSE_NATIVE_X11
|
||||||
|
#elif defined(RT_USE_WAYLAND)
|
||||||
|
#define GLFW_EXPOSE_NATIVE_WAYLAND
|
||||||
|
#endif
|
||||||
|
#define GLFW_INCLUDE_NONE
|
||||||
|
#include <GLFW/glfw3.h>
|
||||||
|
#include <GLFW/glfw3native.h>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/config.h>
|
||||||
|
#include <runtime/dynamic_libs.h>
|
||||||
|
#include <renderer/renderer.h>
|
||||||
|
#include <runtime/fsutils.h>
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include "game_api.h"
|
||||||
|
|
||||||
|
/* This is a launcher cvar, because launcher must configure this _before_ the renderer is initialized. */
|
||||||
|
RT_CVAR_S(l_Renderer, "The used renderer. Available options: vk, dx11. (Default: vk)", "vk");
|
||||||
|
|
||||||
|
RT_CVAR_S(l_WindowTitle, "The title used for the game window. (Default: rtengine)", "rtengine");
|
||||||
|
RT_CVAR_I(l_WindowWidth, "The window width. (Default: 1024)", 1024);
|
||||||
|
RT_CVAR_I(l_WindowHeight, "The window height. (Default: 768)", 768);
|
||||||
|
RT_CVAR_I(l_WindowMode, "The window mode. Available options: 0 (=Windowed), 1 (=Borderless Fullscreen), 2 (=Exclusive Fullscreen) (Default: 0)", 0);
|
||||||
|
RT_CVAR_I(l_FullscreenRefreshRate, "Requested refresh rate for exclusive fullscreen. Set to 0 to use the monitors current setting. (Default: 0)", 0);
|
||||||
|
RT_CVAR_S(l_Monitor, "Name of the monitor on which the window should be created. Leave empty to use the primary monitor. (Default: "")", "");
|
||||||
|
|
||||||
|
RT_CVAR_F(l_Framerate, "Target framerate in FPS. (Default: 60.0)", 60.0);
|
||||||
|
|
||||||
|
/* These are for experiments and debugging */
|
||||||
|
RT_CVAR_I(l_CreateGLContext, "Create an OpenGL context in the launcher. 1: on, 0: off. (Default: 0)", 0);
|
||||||
|
RT_CVAR_I(l_GLContextMajor, "OpenGL Major version. (Default: 4)", 4);
|
||||||
|
RT_CVAR_I(l_GLContextMinor, "OpenGL minor version. (Default: 5)", 5);
|
||||||
|
|
||||||
|
RT_CVAR_S(l_GameLib, "Path to the game library. Only usable in internal builds. (Default: (null))", "(null)");
|
||||||
|
|
||||||
|
enum {
|
||||||
|
WINDOW_MODE_WINDOWED,
|
||||||
|
WINDOW_MODE_BORDERLESS_FULLSCREEN,
|
||||||
|
WINDOW_MODE_FULLSCREEN,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This is baked in during compilation to make tampering with it harder.
|
||||||
|
* In debug (internal) builds, this can be overwritten via cvar or command line argument. */
|
||||||
|
#ifndef RT_GAME_LIB_PATH
|
||||||
|
#define RT_GAME_LIB_PATH "(null)"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static rt_dynlib *_game_lib = NULL;
|
||||||
|
static rt_game_api _game;
|
||||||
|
static rt_window _window;
|
||||||
|
static void *_game_obj = NULL;
|
||||||
|
static rt_launcher_api _launcher_api;
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static HINSTANCE _hInstance;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void SetupConfig(void) {
|
||||||
|
rtRegisterCVAR(&l_Renderer);
|
||||||
|
rtRegisterCVAR(&l_WindowTitle);
|
||||||
|
rtRegisterCVAR(&l_WindowWidth);
|
||||||
|
rtRegisterCVAR(&l_WindowHeight);
|
||||||
|
rtRegisterCVAR(&l_WindowMode);
|
||||||
|
rtRegisterCVAR(&l_Monitor);
|
||||||
|
|
||||||
|
rtRegisterCVAR(&l_Framerate);
|
||||||
|
|
||||||
|
rtRegisterCVAR(&l_CreateGLContext);
|
||||||
|
rtRegisterCVAR(&l_GLContextMajor);
|
||||||
|
rtRegisterCVAR(&l_GLContextMinor);
|
||||||
|
|
||||||
|
rtRegisterCVAR(&l_GameLib);
|
||||||
|
|
||||||
|
rt_file_id config_fid = rtAddFile("cfg/launcher.cfg");
|
||||||
|
if (rtProcessConfigFiles(1, &config_fid) != RT_SUCCESS) {
|
||||||
|
rtLog("LAUNCHER", "Processing launcher configs failed.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void LoadGameAndRendererConfig() {
|
||||||
|
rt_file_id renderer_cfg_fid = rtAddFile("cfg/renderer.cfg");
|
||||||
|
rt_file_id game_cfg_fid = rtAddFile("cfg/game.cfg");
|
||||||
|
rt_file_id fids[2] = { renderer_cfg_fid, game_cfg_fid };
|
||||||
|
|
||||||
|
rtProcessConfigFiles(RT_ARRAY_COUNT(fids), fids);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ParseCommandLineCVARs(int argc, char **argv) {
|
||||||
|
for (int i = 1; i < argc - 1; ++i) {
|
||||||
|
const char *name = argv[i];
|
||||||
|
if (name[0] != '-' || name[1] != '-')
|
||||||
|
continue;
|
||||||
|
name = &name[2];
|
||||||
|
rt_cvar *cvar = rtGetCVAR(name);
|
||||||
|
if (!cvar) {
|
||||||
|
++i; /* Skip value */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const char *value = argv[i + 1];
|
||||||
|
|
||||||
|
if (rtSetCVARFromString(cvar, value) != RT_SUCCESS) {
|
||||||
|
rtLog("LAUNCHER", "Failed to set %s to %s. Invalid value?", cvar->name, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Skip value */
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *ParseCommandLineGameLib(int argc, char **argv) {
|
||||||
|
char *game_lib_cmdline = NULL;
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
for (int i = 1; i < argc - 1; ++i) {
|
||||||
|
if (strcmp(argv[i], "--game") == 0) {
|
||||||
|
game_lib_cmdline = argv[i + 1];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return game_lib_cmdline;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __NullGame_RegisterCVARs(rt_launcher_api *api) {}
|
||||||
|
static rt_result __NullGame_Init(rt_launcher_api *api) { return RT_SUCCESS; }
|
||||||
|
static void __NullGame_Shutdown(rt_launcher_api *api, void *game_obj) {}
|
||||||
|
static void __NullGame_Update(rt_launcher_api *api, rt_time_delta delta, void *game_obj) { RT_UNUSED(delta); }
|
||||||
|
static void __NullGame_Render(rt_launcher_api *api, void *game_obj) {}
|
||||||
|
|
||||||
|
static rt_game_api _null_api = {
|
||||||
|
.RegisterCVARs = __NullGame_RegisterCVARs,
|
||||||
|
.Init = __NullGame_Init,
|
||||||
|
.Shutdown = __NullGame_Shutdown,
|
||||||
|
.Update = __NullGame_Update,
|
||||||
|
.Render = __NullGame_Render,
|
||||||
|
.OnGameObjectFree = NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
static rt_game_api LoadGame(const char *cmdline_gamelib) {
|
||||||
|
const char *game_lib = RT_GAME_LIB_PATH;
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
if (strlen(l_GameLib.s) > 0) {
|
||||||
|
game_lib = l_GameLib.s;
|
||||||
|
}
|
||||||
|
if (cmdline_gamelib && strlen(cmdline_gamelib) > 0) {
|
||||||
|
game_lib = cmdline_gamelib;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (strcmp(game_lib, "(null)") != 0) {
|
||||||
|
_game_lib = rtOpenLib(game_lib);
|
||||||
|
if (!_game_lib) {
|
||||||
|
rtReportError("LAUNCHER", "Failed to open game library: %s", game_lib);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
rt_load_game_api_fn *LoadGameAPI = rtGetSymbol(_game_lib, "rtLoadGameAPI");
|
||||||
|
if (!LoadGameAPI) {
|
||||||
|
rtReportError("LAUNCHER", "%s is not a valid game library (rtLoadGameAPI symbol is missing).", game_lib);
|
||||||
|
rtCloseLib(_game_lib);
|
||||||
|
_game_lib = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
return LoadGameAPI();
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
/* Fall back to null implementation. */
|
||||||
|
return _null_api;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_window LauncherAPIGetWindow(void) {
|
||||||
|
return _window;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *LauncherAPIAllocGameObj(size_t sz) {
|
||||||
|
if (_game_obj) {
|
||||||
|
/* Free the old one */
|
||||||
|
if (_game.OnGameObjectFree)
|
||||||
|
_game.OnGameObjectFree(&_launcher_api, _game_obj);
|
||||||
|
free(_game_obj);
|
||||||
|
}
|
||||||
|
_game_obj = malloc(sz);
|
||||||
|
return _game_obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *LauncherAPIGetGameObject(void) {
|
||||||
|
return _game_obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void DisplayMonitors(void) {
|
||||||
|
int count = 0;
|
||||||
|
GLFWmonitor **monitors = glfwGetMonitors(&count);
|
||||||
|
rtLog("LAUNCHER", "Available monitors:");
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
const char *name =glfwGetMonitorName(monitors[i]);
|
||||||
|
if (monitors[i] != glfwGetPrimaryMonitor())
|
||||||
|
rtLog("LAUNCHER", " - %s", name);
|
||||||
|
else
|
||||||
|
rtLog("LAUNCHER", " - %s (Primary)", name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static GLFWmonitor *ChooseMonitor(void) {
|
||||||
|
GLFWmonitor *monitor = glfwGetPrimaryMonitor();
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
if (strcmp(l_Monitor.s, "") == 0) {
|
||||||
|
return monitor;
|
||||||
|
}
|
||||||
|
GLFWmonitor **monitors = glfwGetMonitors(&count);
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
const char *name = glfwGetMonitorName(monitors[i]);
|
||||||
|
if (strcmp(name, l_Monitor.s) == 0)
|
||||||
|
return monitors[i];
|
||||||
|
}
|
||||||
|
return monitor;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void GlfwErrorCB(int err, const char *desc) {
|
||||||
|
rtReportError("GLFW", "GLFW Error %d: %s", err, desc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int Entry(int argc, char **argv) {
|
||||||
|
if (rtInitRuntime() != RT_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
SetupConfig();
|
||||||
|
|
||||||
|
glfwSetErrorCallback(GlfwErrorCB);
|
||||||
|
if (!glfwInit()) {
|
||||||
|
rtShutdownRuntime();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
DisplayMonitors();
|
||||||
|
|
||||||
|
rtRegisterRenderCVARs();
|
||||||
|
|
||||||
|
/* Load the renderer library.
|
||||||
|
* We need it before window creation, to give it an opportunity to register its cvars */
|
||||||
|
if (rtLoadRenderBackend() != RT_SUCCESS) {
|
||||||
|
rtShutdownRuntime();
|
||||||
|
glfwTerminate();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
rtRegisterRenderBackendCVARs();
|
||||||
|
|
||||||
|
_launcher_api = (rt_launcher_api){
|
||||||
|
.GetWindow = LauncherAPIGetWindow,
|
||||||
|
.AllocGameObject = LauncherAPIAllocGameObj,
|
||||||
|
.GetGameObject = LauncherAPIGetGameObject,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Load the game */
|
||||||
|
const char *game_lib_cmdline = ParseCommandLineGameLib(argc, argv);
|
||||||
|
_game = LoadGame(game_lib_cmdline);
|
||||||
|
_game.RegisterCVARs(&_launcher_api);
|
||||||
|
|
||||||
|
LoadGameAndRendererConfig();
|
||||||
|
ParseCommandLineCVARs(argc, argv);
|
||||||
|
|
||||||
|
/* Create the window */
|
||||||
|
GLFWmonitor *monitor = ChooseMonitor();
|
||||||
|
|
||||||
|
GLFWwindow *window = NULL;
|
||||||
|
if (!l_CreateGLContext.i) {
|
||||||
|
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
|
||||||
|
} else {
|
||||||
|
rtLog("LAUNCHER", "Creating an OpenGL %d.%d context", l_GLContextMajor.i, l_GLContextMinor.i);
|
||||||
|
glfwWindowHint(GLFW_CLIENT_API, GLFW_OPENGL_API);
|
||||||
|
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, l_GLContextMajor.i);
|
||||||
|
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, l_GLContextMinor.i);
|
||||||
|
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
|
||||||
|
}
|
||||||
|
glfwWindowHint(GLFW_FOCUSED, GLFW_TRUE);
|
||||||
|
glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE);
|
||||||
|
glfwWindowHint(GLFW_CENTER_CURSOR, GLFW_TRUE);
|
||||||
|
|
||||||
|
if (l_WindowMode.i == WINDOW_MODE_BORDERLESS_FULLSCREEN) {
|
||||||
|
const GLFWvidmode *mode = glfwGetVideoMode(glfwGetPrimaryMonitor());
|
||||||
|
glfwWindowHint(GLFW_RED_BITS, mode->redBits);
|
||||||
|
glfwWindowHint(GLFW_GREEN_BITS, mode->greenBits);
|
||||||
|
glfwWindowHint(GLFW_BLUE_BITS, mode->blueBits);
|
||||||
|
glfwWindowHint(GLFW_REFRESH_RATE, mode->refreshRate);
|
||||||
|
window = glfwCreateWindow(mode->width, mode->height, l_WindowTitle.s, monitor, NULL);
|
||||||
|
}
|
||||||
|
else if (l_WindowMode.i == WINDOW_MODE_FULLSCREEN) {
|
||||||
|
int refresh_rate = l_FullscreenRefreshRate.i;
|
||||||
|
if (refresh_rate == 0) {
|
||||||
|
refresh_rate = glfwGetVideoMode(glfwGetPrimaryMonitor())->refreshRate;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int count;
|
||||||
|
glfwGetVideoModes(NULL, &count);
|
||||||
|
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||||
|
GLFWvidmode *modes = RT_ARENA_PUSH_ARRAY(temp.arena, GLFWvidmode, count);
|
||||||
|
int is_supported = 0;
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
if (modes[i].refreshRate == refresh_rate) {
|
||||||
|
is_supported = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rtReturnTemporaryArena(temp);
|
||||||
|
if (!is_supported) {
|
||||||
|
rtLog("LAUNCHER", "Requested refresh rate %d Hz is not supported. Using current setting instead.", refresh_rate);
|
||||||
|
refresh_rate = glfwGetVideoMode(glfwGetPrimaryMonitor())->refreshRate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
glfwWindowHint(GLFW_REFRESH_RATE, refresh_rate);
|
||||||
|
window = glfwCreateWindow(l_WindowWidth.i, l_WindowHeight.i, l_WindowTitle.s, monitor, NULL);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
window = glfwCreateWindow(l_WindowWidth.i, l_WindowHeight.i, l_WindowTitle.s, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!window) {
|
||||||
|
rtReportError("LAUNCHER", "Failed to create the game window.");
|
||||||
|
if (_game_lib)
|
||||||
|
rtCloseLib(_game_lib);
|
||||||
|
glfwTerminate();
|
||||||
|
rtShutdownRuntime();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
_window.type = RT_WINDOW_TYPE_GLFW;
|
||||||
|
_window.glfw = window;
|
||||||
|
|
||||||
|
if (l_CreateGLContext.i) {
|
||||||
|
glfwMakeContextCurrent(window);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize the renderer */
|
||||||
|
rt_renderer_window_info renderer_init_info;
|
||||||
|
#ifdef _WIN32
|
||||||
|
renderer_init_info.hWnd = glfwGetWin32Window(window);
|
||||||
|
renderer_init_info.hInstance = _hInstance;
|
||||||
|
#elif defined(__linux__) && defined(RT_USE_XLIB)
|
||||||
|
renderer_init_info.display = glfwGetX11Display();
|
||||||
|
renderer_init_info.window = glfwGetX11Window(window);
|
||||||
|
#endif
|
||||||
|
renderer_init_info.is_fullscreen = l_WindowMode.i != WINDOW_MODE_WINDOWED;
|
||||||
|
glfwGetFramebufferSize(window, (int*)&renderer_init_info.width, (int*)&renderer_init_info.height);
|
||||||
|
if (rtInitRenderer(&renderer_init_info) != RT_SUCCESS) {
|
||||||
|
rtReportError("LAUNCHER", "Failed to initialize the renderer.");
|
||||||
|
if (_game_lib)
|
||||||
|
rtCloseLib(_game_lib);
|
||||||
|
glfwTerminate();
|
||||||
|
rtShutdownRuntime();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_game.Init(&_launcher_api) != RT_SUCCESS) {
|
||||||
|
rtReportError("LAUNCHER", "Failed to initialize the renderer.");
|
||||||
|
if (_game_lib)
|
||||||
|
rtCloseLib(_game_lib);
|
||||||
|
glfwTerminate();
|
||||||
|
rtShutdownRuntime();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_time_delta time_per_update = 1.0 / (double)l_Framerate.f;
|
||||||
|
rt_timestamp previous = rtTimeNow();
|
||||||
|
rt_time_delta lag = time_per_update;
|
||||||
|
while (!glfwWindowShouldClose(window)) {
|
||||||
|
glfwPollEvents();
|
||||||
|
|
||||||
|
rt_timestamp current = rtTimeNow();
|
||||||
|
rt_time_delta elapsed = rtTimeBetween(previous, current);
|
||||||
|
previous = current;
|
||||||
|
lag += elapsed;
|
||||||
|
|
||||||
|
/* TODO: Process input */
|
||||||
|
|
||||||
|
while (lag >= time_per_update) {
|
||||||
|
_game.Update(&_launcher_api, time_per_update, _game_obj);
|
||||||
|
lag -= time_per_update;
|
||||||
|
}
|
||||||
|
|
||||||
|
int disp_w, disp_h;
|
||||||
|
glfwGetFramebufferSize(window, &disp_w, &disp_h);
|
||||||
|
_game.Render(&_launcher_api, _game_obj);
|
||||||
|
|
||||||
|
if (l_CreateGLContext.i) {
|
||||||
|
glfwSwapBuffers(window);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_game.Shutdown(&_launcher_api, _game_obj);
|
||||||
|
rtShutdownRenderer();
|
||||||
|
|
||||||
|
glfwDestroyWindow(window);
|
||||||
|
glfwTerminate();
|
||||||
|
if (_game_lib)
|
||||||
|
rtCloseLib(_game_lib);
|
||||||
|
rtShutdownRuntime();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PSTR pCmdLine, int nCmdShow) {
|
||||||
|
_hInstance = hInstance;
|
||||||
|
|
||||||
|
/* Convert to UTF-8 argv array */
|
||||||
|
LPWSTR pWCmdLine = GetCommandLineW();
|
||||||
|
int argc = 0;
|
||||||
|
LPWSTR *pWArgv = CommandLineToArgvW(pWCmdLine, &argc);
|
||||||
|
/* Determine total amount of memory needed */
|
||||||
|
size_t mem_required = sizeof(char *) * argc; /* array of pointers */
|
||||||
|
for (int i = 0; i < argc; ++i) {
|
||||||
|
mem_required += (size_t)WideCharToMultiByte(CP_UTF8, WC_COMPOSITECHECK, pWArgv[i], -1, NULL, 0, NULL, NULL);
|
||||||
|
}
|
||||||
|
void *argv_mem = VirtualAlloc(NULL, mem_required, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
|
||||||
|
char **argv = argv_mem;
|
||||||
|
char *arg = (char *)(argv + argc);
|
||||||
|
for (int i = 0; i < argc; ++i) {
|
||||||
|
int len = WideCharToMultiByte(CP_UTF8, WC_COMPOSITECHECK, pWArgv[i], -1, NULL, 0, NULL, NULL);
|
||||||
|
WideCharToMultiByte(CP_UTF8, WC_COMPOSITECHECK, pWArgv[i], -1, arg, len, NULL, NULL);
|
||||||
|
argv[i] = arg;
|
||||||
|
arg += len;
|
||||||
|
}
|
||||||
|
LocalFree(pWArgv);
|
||||||
|
|
||||||
|
for (int i = 0; i < argc; ++i) {
|
||||||
|
rtLog("LAUNCHER", "argv[%d]: %s", i, argv[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = Entry(argc, argv);
|
||||||
|
VirtualFree(argv_mem, 0, MEM_RELEASE);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
return Entry(argc, argv);
|
||||||
|
}
|
||||||
|
#endif
|
12
src/launcher/meson.build
Normal file
12
src/launcher/meson.build
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
launcher_deps = [thread_dep, glfw_dep, m_dep]
|
||||||
|
launcher_link_libs = [runtime_lib, renderer_lib]
|
||||||
|
|
||||||
|
launcher_name = get_option('launcher_name')
|
||||||
|
|
||||||
|
executable(launcher_name,
|
||||||
|
'launcher.c',
|
||||||
|
include_directories: engine_incdir,
|
||||||
|
dependencies: launcher_deps,
|
||||||
|
link_with: launcher_link_libs,
|
||||||
|
win_subsystem: 'windows')
|
||||||
|
|
@ -1,9 +1,9 @@
|
|||||||
subdir('runtime')
|
subdir('runtime')
|
||||||
subdir('asset_compiler')
|
subdir('asset_compiler')
|
||||||
subdir('app_framework')
|
|
||||||
|
|
||||||
subdir('renderer/common')
|
subdir('renderer')
|
||||||
subdir('renderer/dx11')
|
|
||||||
|
subdir('launcher')
|
||||||
|
|
||||||
if get_option('build_experiments')
|
if get_option('build_experiments')
|
||||||
subdir('experimental')
|
subdir('experimental')
|
||||||
|
59
src/renderer/backend_api.h
Normal file
59
src/renderer/backend_api.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#ifndef RT_RENCOM_RENDERER_API_H
|
||||||
|
#define RT_RENCOM_RENDERER_API_H
|
||||||
|
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include "renderer.h"
|
||||||
|
#include "render_resource.h"
|
||||||
|
#include "command_list.h"
|
||||||
|
|
||||||
|
typedef struct rt_physical_resource_manager_i rt_physical_resource_manager_i;
|
||||||
|
|
||||||
|
typedef rt_physical_resource_manager_i rt_render_device_get_physical_resource_manager_fn(void *o);
|
||||||
|
typedef rt_result rt_render_device_submit_command_list_fn(void *o, const rt_render_command_list *list);
|
||||||
|
|
||||||
|
/* Interface for the render device.
|
||||||
|
* The device is responsible for executing command lists. */
|
||||||
|
typedef struct {
|
||||||
|
void *o;
|
||||||
|
rt_render_device_get_physical_resource_manager_fn *GetPhysicalResourceManager;
|
||||||
|
rt_render_device_submit_command_list_fn *SubmitCommandList;
|
||||||
|
} rt_render_device_i;
|
||||||
|
|
||||||
|
typedef bool rt_physical_resource_manager_is_present_fn(void *o, rt_render_resource_handle h);
|
||||||
|
typedef void rt_physical_resource_manager_destroy_fn(void *o, rt_render_resource_handle h);
|
||||||
|
typedef rt_result rt_physical_resource_manager_create_buffer_fn(void *o, rt_render_resource_handle h, const rt_render_buffer_desc *desc);
|
||||||
|
typedef rt_result rt_physical_resource_manager_create_texture2d_fn(void *o, rt_render_resource_handle h, const rt_render_texture2d_desc *desc);
|
||||||
|
|
||||||
|
/* Interface for the physical resource manager.
|
||||||
|
* The physical resource manager maps render resources to actual gpu memory (=api objects like VkImage)
|
||||||
|
*/
|
||||||
|
struct rt_physical_resource_manager_i {
|
||||||
|
void *o;
|
||||||
|
rt_physical_resource_manager_is_present_fn *IsPresent;
|
||||||
|
rt_physical_resource_manager_destroy_fn *Destroy;
|
||||||
|
rt_physical_resource_manager_create_buffer_fn *CreateBuffer;
|
||||||
|
rt_physical_resource_manager_create_texture2d_fn *CreateTexture2D;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_result result;
|
||||||
|
rt_render_device_i device;
|
||||||
|
} rt_render_backend_init_result;
|
||||||
|
|
||||||
|
typedef void rt_render_backend_register_cvars_fn(void);
|
||||||
|
typedef rt_render_backend_init_result rt_render_backend_init_fn(const rt_renderer_window_info *info);
|
||||||
|
typedef void rt_render_backend_shutdown_fn(void);
|
||||||
|
|
||||||
|
/* Public renderer interface */
|
||||||
|
typedef struct {
|
||||||
|
rt_render_backend_register_cvars_fn *RegisterCVARs;
|
||||||
|
rt_render_backend_init_fn *Init;
|
||||||
|
rt_render_backend_shutdown_fn *Shutdown;
|
||||||
|
} rt_render_backend_api;
|
||||||
|
|
||||||
|
extern rt_render_backend_api g_render_backend;
|
||||||
|
extern rt_render_device_i g_device_i;
|
||||||
|
|
||||||
|
#endif
|
102
src/renderer/command_list.c
Normal file
102
src/renderer/command_list.c
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
#include "command_list.h"
|
||||||
|
#include "backend_api.h"
|
||||||
|
|
||||||
|
#include <runtime/config.h>
|
||||||
|
#include <runtime/mem_arena.h>
|
||||||
|
#include <runtime/threading.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
RT_CVAR_SZ(r_PerFrameCommandListMemory, "Amount of memory to allocate for single frame command list management."
|
||||||
|
" The total amount of memory will be this times the maximum number of frames in flight. (Default: 16 MB)",
|
||||||
|
RT_MB(16));
|
||||||
|
|
||||||
|
static rt_arena _list_arenas[3];
|
||||||
|
static rt_arena *_current_arena = &_list_arenas[0];
|
||||||
|
static unsigned int _current_arena_idx = 0;
|
||||||
|
static rt_mutex *_mutex;
|
||||||
|
|
||||||
|
rt_result InitCommandLists(void) {
|
||||||
|
_mutex = rtCreateMutex();
|
||||||
|
if (!_mutex)
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < RT_ARRAY_COUNT(_list_arenas); ++i) {
|
||||||
|
rt_create_arena_result arena_res = rtCreateArena(NULL, r_PerFrameCommandListMemory.sz);
|
||||||
|
if (!arena_res.ok) {
|
||||||
|
rtDestroyMutex(_mutex);
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
_list_arenas[i] = arena_res.arena;
|
||||||
|
}
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ShutdownCommandLists(void) {
|
||||||
|
for (unsigned int i = 0; i < RT_ARRAY_COUNT(_list_arenas); ++i) {
|
||||||
|
rtReleaseArena(&_list_arenas[i]);
|
||||||
|
}
|
||||||
|
rtDestroyMutex(_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandListsOnBeginFrame(void) {
|
||||||
|
_current_arena_idx = (_current_arena_idx + 1) % RT_ARRAY_COUNT(_list_arenas);
|
||||||
|
_current_arena = &_list_arenas[_current_arena_idx];
|
||||||
|
rtArenaClear(_current_arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define COMMAND_LIST_CAPACITY RT_KB(512)
|
||||||
|
#define AVERAGE_COMMAND_DATA_SIZE sizeof(rt_draw_indirect_data)
|
||||||
|
#define COMMAND_LIST_MAX_LENGTH (COMMAND_LIST_CAPACITY / AVERAGE_COMMAND_DATA_SIZE)
|
||||||
|
|
||||||
|
/* Get a new render command list. */
|
||||||
|
RT_DLLEXPORT rt_begin_render_command_list_result rtBeginRenderCommandList(rt_render_queue queue) {
|
||||||
|
size_t mem_required = COMMAND_LIST_MAX_LENGTH * sizeof(rt_render_command_header) + COMMAND_LIST_CAPACITY;
|
||||||
|
rtLockMutex(_mutex);
|
||||||
|
void *mem = rtArenaPush(_current_arena, mem_required);
|
||||||
|
rtUnlockMutex(_mutex);
|
||||||
|
if (!mem) {
|
||||||
|
rtReportError("RENDERER", "Ran out of memory for command lists.");
|
||||||
|
return (rt_begin_render_command_list_result){.result = RT_OUT_OF_MEMORY};
|
||||||
|
}
|
||||||
|
rt_render_command_list list = {
|
||||||
|
.headers = mem,
|
||||||
|
.target_queue = queue,
|
||||||
|
.data = (void *)((rt_render_command_header *)mem + COMMAND_LIST_MAX_LENGTH),
|
||||||
|
.length = 0u,
|
||||||
|
.data_capacity = COMMAND_LIST_CAPACITY,
|
||||||
|
.data_end = 0u
|
||||||
|
};
|
||||||
|
return (rt_begin_render_command_list_result){
|
||||||
|
.result = RT_SUCCESS,
|
||||||
|
.list = list
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Lowlevel function that writes the data to the queue. */
|
||||||
|
RT_DLLEXPORT rt_result rtEncodeRenderCommand(rt_render_command_list *list, rt_render_command_type type, const void *data) {
|
||||||
|
size_t data_size = 0u;
|
||||||
|
switch (type) {
|
||||||
|
case RT_RENDER_COMMAND_DRAW_INDIRECT:
|
||||||
|
data_size = sizeof(rt_draw_indirect_data);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rtReportError("RENDERER", "Invalid render command type %u", type);
|
||||||
|
return RT_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list->length == COMMAND_LIST_MAX_LENGTH || (list->data_end + data_size) > list->data_capacity) {
|
||||||
|
rtReportError("RENDERER", "Reached maximum lenght or capacity of command list.");
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
list->headers[list->length].type = type;
|
||||||
|
++list->length;
|
||||||
|
char *dest = (char *)list->data + list->data_end;
|
||||||
|
memcpy(dest, data, data_size);
|
||||||
|
list->data_end += data_size;
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Submit a finished command list to the graphics device. */
|
||||||
|
RT_DLLEXPORT rt_result rtSubmitCommandList(const rt_render_command_list *list) {
|
||||||
|
return g_device_i.SubmitCommandList(g_device_i.o, list);
|
||||||
|
}
|
90
src/renderer/command_list.h
Normal file
90
src/renderer/command_list.h
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
#ifndef RT_RENDERER_COMMAND_LIST_H
|
||||||
|
#define RT_RENDERER_COMMAND_LIST_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include "render_resource.h"
|
||||||
|
|
||||||
|
/* Types of render commands */
|
||||||
|
typedef enum {
|
||||||
|
RT_RENDER_COMMAND_DRAW_INDIRECT,
|
||||||
|
|
||||||
|
|
||||||
|
RT_RENDER_COMMAND_COUNT,
|
||||||
|
} rt_render_command_type;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
RT_RENDER_QUEUE_GRAPHICS,
|
||||||
|
RT_RENDER_QUEUE_COMPUTE,
|
||||||
|
RT_RENDER_QUEUE_TRANSFER,
|
||||||
|
} rt_render_queue;
|
||||||
|
|
||||||
|
/* Structures containing command parameters */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_render_buffer_handle buffer;
|
||||||
|
uint32_t offset;
|
||||||
|
uint32_t draw_count;
|
||||||
|
uint32_t stride;
|
||||||
|
} rt_draw_indirect_data;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint32_t type;
|
||||||
|
} rt_render_command_header;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
/* One header per command */
|
||||||
|
rt_render_command_header *headers;
|
||||||
|
|
||||||
|
/* Contains the command data */
|
||||||
|
void *data;
|
||||||
|
|
||||||
|
/* Number of encoded commands */
|
||||||
|
uint32_t length;
|
||||||
|
|
||||||
|
/* Tne device queue to which this list should be submitted*/
|
||||||
|
uint32_t target_queue;
|
||||||
|
|
||||||
|
/* Information used while encoding.
|
||||||
|
* In the future, it would be possible to move this to another struct that is discarded after
|
||||||
|
* encoding is finished.
|
||||||
|
*/
|
||||||
|
|
||||||
|
size_t data_capacity;
|
||||||
|
size_t data_end;
|
||||||
|
} rt_render_command_list;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* *** Encoding API *** */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_result result;
|
||||||
|
rt_render_command_list list;
|
||||||
|
} rt_begin_render_command_list_result;
|
||||||
|
|
||||||
|
/* Get a new render command list. */
|
||||||
|
RT_DLLEXPORT rt_begin_render_command_list_result rtBeginRenderCommandList(rt_render_queue queue);
|
||||||
|
|
||||||
|
/* Lowlevel function that writes the data to the queue. */
|
||||||
|
RT_DLLEXPORT rt_result rtEncodeRenderCommand(rt_render_command_list *list, rt_render_command_type type, const void *data);
|
||||||
|
|
||||||
|
/* Helper functions for specific commands */
|
||||||
|
RT_INLINE rt_result rtEncodeDrawIndirect(rt_render_command_list *list, const rt_draw_indirect_data *draw_indirect_data) {
|
||||||
|
return rtEncodeRenderCommand(list, RT_RENDER_COMMAND_DRAW_INDIRECT, draw_indirect_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* *** Submission *** */
|
||||||
|
|
||||||
|
/* Submit a finished command list to the graphics device. */
|
||||||
|
RT_DLLEXPORT rt_result rtSubmitCommandList(const rt_render_command_list *list);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -1,46 +0,0 @@
|
|||||||
#include "renderer_api.h"
|
|
||||||
#include "runtime/dynamic_libs.h"
|
|
||||||
#include "runtime/config.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
static RT_CVAR_S(rt_Renderer, "The used renderer. Available options: dx11.", "dx11");
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_renderer_api g_renderer;
|
|
||||||
|
|
||||||
typedef rt_renderer_api rt_load_renderer_impl_fn(void);
|
|
||||||
|
|
||||||
#ifdef RT_STATIC_LIB
|
|
||||||
extern rt_renderer_api rtLoadRendererImpl(void);
|
|
||||||
#else
|
|
||||||
static rt_dynlib _renderer_lib;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtLoadRenderer(void) {
|
|
||||||
rt_load_renderer_impl_fn *LoadRendererImpl = NULL;
|
|
||||||
if (!rtGetCVAR("rt_Renderer"))
|
|
||||||
rtRegisterCVAR(&rt_Renderer);
|
|
||||||
#ifdef RT_STATIC_LIB
|
|
||||||
LoadRendererImpl = rtLoadRendererImpl;
|
|
||||||
#else
|
|
||||||
const char *dllname = NULL;
|
|
||||||
if (strcmp(rt_Renderer.s, "dx11")==0)
|
|
||||||
dllname = RT_DLLNAME("rtdx11");
|
|
||||||
else {
|
|
||||||
rtReportError("RENDERER", "Invalid renderer selected: %s", rt_Renderer.s);
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
_renderer_lib = rtOpenLib(dllname);
|
|
||||||
if (!_renderer_lib) {
|
|
||||||
rtReportError("RENDERER", "Unable to load renderer backend: %s", dllname);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
LoadRendererImpl = (rt_load_renderer_impl_fn *)rtGetSymbol(_renderer_lib, "rtLoadRendererImpl");
|
|
||||||
if (!LoadRendererImpl) {
|
|
||||||
rtReportError("RENDERER", "Invalid renderer. Could not find rtLoadRendererImpl");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
g_renderer = LoadRendererImpl();
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
common_renderer_lib = static_library('common_renderer',
|
|
||||||
'../common/renderer_api.h',
|
|
||||||
'../common/render_mesh.h',
|
|
||||||
|
|
||||||
'../common/meshlet_pools.c',
|
|
||||||
'../common/render_mesh.c',
|
|
||||||
dependencies: [m_dep, thread_dep],
|
|
||||||
link_with: runtime_lib,
|
|
||||||
include_directories: [engine_incdir, contrib_incdir],
|
|
||||||
install: false)
|
|
@ -10,7 +10,7 @@ rt_dx11_device::rt_dx11_device() : m_is_initialized(false) {
|
|||||||
rt_dx11_device::~rt_dx11_device() {
|
rt_dx11_device::~rt_dx11_device() {
|
||||||
}
|
}
|
||||||
|
|
||||||
rt_result rt_dx11_device::Initialize(const rt_renderer_init_info *info) {
|
rt_result rt_dx11_device::Initialize(const rt_renderer_window_info *info) {
|
||||||
// Create the necessary objects (Device, SwapChain, Immediate Context)
|
// Create the necessary objects (Device, SwapChain, Immediate Context)
|
||||||
HRESULT hr = S_OK;
|
HRESULT hr = S_OK;
|
||||||
ComPtr<IDXGIFactory1> factory;
|
ComPtr<IDXGIFactory1> factory;
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#error This file must only be used from C++ code
|
#error This file must only be used from C++ code
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "renderer/common/renderer_api.h"
|
#include "renderer/backend_api.h"
|
||||||
#include "runtime/runtime.h"
|
#include "runtime/runtime.h"
|
||||||
|
|
||||||
#include <d3d11_4.h>
|
#include <d3d11_4.h>
|
||||||
@ -22,7 +22,7 @@ class rt_dx11_device {
|
|||||||
return &dev;
|
return &dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
rt_result Initialize(const rt_renderer_init_info *info);
|
rt_result Initialize(const rt_renderer_window_info *info);
|
||||||
void Shutdown(void);
|
void Shutdown(void);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -1,27 +1,30 @@
|
|||||||
#include "renderer/common/render_mesh.h"
|
#include "renderer/backend_api.h"
|
||||||
#include "renderer/common/renderer_api.h"
|
|
||||||
|
|
||||||
#include "device.hpp"
|
#include "device.hpp"
|
||||||
|
|
||||||
rt_result Dx11Init(const rt_renderer_init_info *info) {
|
void Dx11RegisterCVARs(void) {
|
||||||
rt_result res = rt_dx11_device::GetInstance()->Initialize(info);
|
}
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
|
|
||||||
res = rtInitMeshStreaming();
|
rt_render_backend_init_result Dx11Init(const rt_renderer_window_info *info) {
|
||||||
return res;
|
rt_result res = rt_dx11_device::GetInstance()->Initialize(info);
|
||||||
|
|
||||||
|
rt_render_device_i iface = {reinterpret_cast<void*>(rt_dx11_device::GetInstance()), nullptr};
|
||||||
|
rt_render_backend_init_result result = {.result = res, .device = iface};
|
||||||
|
if (res != RT_SUCCESS)
|
||||||
|
return result;
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dx11Shutdown(void) {
|
void Dx11Shutdown(void) {
|
||||||
rtShutdownMeshStreaming();
|
|
||||||
rt_dx11_device::GetInstance()->Shutdown();
|
rt_dx11_device::GetInstance()->Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Called by the application to retrieve the renderer api
|
// Called by the application to retrieve the renderer api
|
||||||
extern "C" RT_DLLEXPORT rt_renderer_api rtLoadRendererImpl(void) {
|
extern "C" RT_DLLEXPORT rt_render_backend_api rtLoadRenderBackendImpl(void) {
|
||||||
rt_renderer_api api = {
|
rt_render_backend_api api = {
|
||||||
.Init = Dx11Init,
|
.RegisterCVARs = Dx11RegisterCVARs,
|
||||||
.Shutdown = Dx11Shutdown,
|
.Init = Dx11Init,
|
||||||
|
.Shutdown = Dx11Shutdown,
|
||||||
};
|
};
|
||||||
return api;
|
return api;
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@ if get_option('build_dx11')
|
|||||||
|
|
||||||
dependencies: [m_dep, dx11_dep, thread_dep],
|
dependencies: [m_dep, dx11_dep, thread_dep],
|
||||||
include_directories: [engine_incdir, contrib_incdir],
|
include_directories: [engine_incdir, contrib_incdir],
|
||||||
link_with: [runtime_lib, common_renderer_lib],
|
link_with: [runtime_lib],
|
||||||
cpp_pch: 'pch/dx11_pch.h',
|
cpp_pch: 'pch/dx11_pch.h',
|
||||||
override_options: ['b_sanitize=none'],
|
override_options: ['b_sanitize=none'],
|
||||||
install: true)
|
install: true)
|
||||||
|
49
src/renderer/init.c
Normal file
49
src/renderer/init.c
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#include "renderer.h"
|
||||||
|
#include "backend_api.h"
|
||||||
|
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/config.h>
|
||||||
|
|
||||||
|
extern rt_cvar r_MaxRenderResources;
|
||||||
|
extern rt_cvar r_PerFrameCommandListMemory;
|
||||||
|
|
||||||
|
RT_DLLEXPORT void rtRegisterRenderCVARs(void) {
|
||||||
|
rtRegisterCVAR(&r_MaxRenderResources);
|
||||||
|
rtRegisterCVAR(&r_PerFrameCommandListMemory);
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT void rtRegisterRenderBackendCVARs(void) {
|
||||||
|
g_render_backend.RegisterCVARs();
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_render_device_i g_device_i;
|
||||||
|
|
||||||
|
extern rt_result InitVirtualResourceRegistry(void);
|
||||||
|
extern void ShutdownVirtualResourceRegistry(void);
|
||||||
|
extern rt_result InitCommandLists(void);
|
||||||
|
extern void ShutdownCommandLists(void);
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_result rtInitRenderer(const rt_renderer_window_info *info) {
|
||||||
|
rt_render_backend_init_result backend_res = g_render_backend.Init(info);
|
||||||
|
if (backend_res.result != RT_SUCCESS)
|
||||||
|
return backend_res.result;
|
||||||
|
g_device_i = backend_res.device;
|
||||||
|
|
||||||
|
rt_result res;
|
||||||
|
if ((res = InitVirtualResourceRegistry()) != RT_SUCCESS) {
|
||||||
|
g_render_backend.Shutdown();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
if ((res = InitCommandLists()) != RT_SUCCESS) {
|
||||||
|
ShutdownVirtualResourceRegistry();
|
||||||
|
g_render_backend.Shutdown();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT void rtShutdownRenderer(void) {
|
||||||
|
ShutdownCommandLists();
|
||||||
|
ShutdownVirtualResourceRegistry();
|
||||||
|
g_render_backend.Shutdown();
|
||||||
|
}
|
58
src/renderer/load_stub.c
Normal file
58
src/renderer/load_stub.c
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#include "backend_api.h"
|
||||||
|
#include "runtime/dynamic_libs.h"
|
||||||
|
#include "runtime/config.h"
|
||||||
|
#include "runtime/runtime.h"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_render_backend_api g_render_backend;
|
||||||
|
|
||||||
|
typedef rt_render_backend_api rt_load_renderer_impl_fn(void);
|
||||||
|
|
||||||
|
#ifdef RT_STATIC_LIB
|
||||||
|
extern rt_render_backend_api rtLoadRenderBackendImpl(void);
|
||||||
|
#else
|
||||||
|
static rt_dynlib _renderer_lib;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define DEFAULT_RENDERER "vk"
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_result rtLoadRenderBackend(void) {
|
||||||
|
rt_load_renderer_impl_fn *LoadRenderBackendImpl = NULL;
|
||||||
|
rt_cvar *l_Renderer = rtGetCVAR("l_Renderer");
|
||||||
|
const char *renderer = DEFAULT_RENDERER;
|
||||||
|
if (!l_Renderer) {
|
||||||
|
rtReportError("RENDERER", "rt_Renderer CVAR is not registered. Falling back to '%s'.", DEFAULT_RENDERER);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
renderer = l_Renderer->s;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef RT_STATIC_LIB
|
||||||
|
RT_UNUSED((void*)renderer);
|
||||||
|
LoadRenderBackendImpl = rtLoadRenderBackendImpl;
|
||||||
|
#else
|
||||||
|
const char *dllname = NULL;
|
||||||
|
if (strcmp(renderer, "dx11")==0)
|
||||||
|
dllname = RT_DLLNAME("rtdx11");
|
||||||
|
else if (strcmp(renderer, "vk") == 0)
|
||||||
|
dllname = RTODLLNAME("rtvk");
|
||||||
|
else {
|
||||||
|
rtReportError("RENDERER", "Invalid renderer selected: %s", rt_Renderer.s);
|
||||||
|
return RT_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
_renderer_lib = rtOpenLib(dllname);
|
||||||
|
if (!_renderer_lib) {
|
||||||
|
rtReportError("RENDERER", "Unable to load renderer backend: %s", dllname);
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
LoadRenderBackendImpl = (rt_load_renderer_impl_fn *)rtGetSymbol(_renderer_lib, "rtLoadRenderBackendImpl");
|
||||||
|
if (!LoadRenderBackendImpl) {
|
||||||
|
rtReportError("RENDERER", "Invalid renderer. Could not find rtLoadRendererImpl");
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
g_render_backend = LoadRenderBackendImpl();
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
36
src/renderer/meson.build
Normal file
36
src/renderer/meson.build
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# Add backends here
|
||||||
|
subdir('dx11')
|
||||||
|
subdir('vk')
|
||||||
|
|
||||||
|
renderer_link_libs = [runtime_lib]
|
||||||
|
|
||||||
|
if get_option('default_library') == 'static'
|
||||||
|
if get_option('static_renderer') == 'vk'
|
||||||
|
renderer_link_libs += vk_renderer_lib
|
||||||
|
elif get_option('static_renderer') == 'dx11'
|
||||||
|
renderer_link_libs += dx11_renderer_lib
|
||||||
|
else
|
||||||
|
error('Invalid static_renderer option ', get_option('static_renderer'))
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
renderer_lib = library('renderer',
|
||||||
|
'backend_api.h',
|
||||||
|
'command_list.h',
|
||||||
|
'renderer.h',
|
||||||
|
'render_mesh.h',
|
||||||
|
'render_resource.h',
|
||||||
|
'virtual_resource_registry.h',
|
||||||
|
|
||||||
|
'command_list.c',
|
||||||
|
'init.c',
|
||||||
|
'load_stub.c',
|
||||||
|
'meshlet_pools.c',
|
||||||
|
'render_mesh.c',
|
||||||
|
'virtual_resource_registry.c',
|
||||||
|
dependencies: [m_dep, thread_dep],
|
||||||
|
link_with: renderer_link_libs,
|
||||||
|
include_directories: engine_incdir,
|
||||||
|
install: true)
|
||||||
|
|
131
src/renderer/render_resource.h
Normal file
131
src/renderer/render_resource.h
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
#ifndef RT_RENDERER_RENDER_RESOURCE_H
|
||||||
|
#define RT_RENDERER_RENDER_RESOURCE_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/resources.h>
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
RT_RENDER_RESOURCE_TYPE_INVALID,
|
||||||
|
RT_RENDER_RESOURCE_TYPE_BUFFER,
|
||||||
|
RT_RENDER_RESOURCE_TYPE_TEXTURE2D,
|
||||||
|
/* Max is 2**6 = 64 */
|
||||||
|
} rt_render_resource_type;
|
||||||
|
|
||||||
|
/* Handle to a render resource.
|
||||||
|
* The layout is:
|
||||||
|
* | type : 6 | version : 6 | index : 20 |
|
||||||
|
* MSB LSB
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
uint32_t value;
|
||||||
|
} rt_render_resource_handle;
|
||||||
|
|
||||||
|
/* Aliases for render_resource_handle to be able to express the type of expected resources in code. */
|
||||||
|
|
||||||
|
typedef rt_render_resource_handle rt_render_buffer_handle;
|
||||||
|
typedef rt_render_resource_handle rt_render_texture2d_handle;
|
||||||
|
|
||||||
|
#define RT_RENDER_RESOURCE_MAX_VERSION 0x3f
|
||||||
|
|
||||||
|
/* Extract the type part of a render resource handle */
|
||||||
|
static RT_INLINE rt_render_resource_type rtGetRenderResourceHandleType(rt_render_resource_handle h) {
|
||||||
|
return (rt_render_resource_type)((h.value >> 26) & 0x3f);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract the version part of a render resource handle */
|
||||||
|
static RT_INLINE uint32_t rtGetRenderResourceHandleVersion(rt_render_resource_handle h) {
|
||||||
|
return (h.value >> 20) & 0x3f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract the index part of a render resource handle */
|
||||||
|
static RT_INLINE uint32_t rtGetRenderResourceHandleIndex(rt_render_resource_handle h) {
|
||||||
|
return h.value & 0xfffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a render resource handle. This only does the required bit-shifting, it does not actually register any resource for the handle. */
|
||||||
|
static RT_INLINE rt_render_resource_handle rtMakeRenderResourceHandle(rt_render_resource_type type, uint32_t version, uint32_t index) {
|
||||||
|
rt_render_resource_handle h;
|
||||||
|
h.value = ((type & 0x3f) << 26u) | ((version & 0x3f) << 20u) | (index & 0xfffff);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Resource description structs */
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
RT_RENDER_BUFFER_USAGE_NONE = 0,
|
||||||
|
RT_RENDER_BUFFER_USAGE_VERTEX_BUFFER = 0x01,
|
||||||
|
RT_RENDER_BUFFER_USAGE_INDEX_BUFFER = 0x02,
|
||||||
|
RT_RENDER_BUFFER_USAGE_STORAGE_BUFFER = 0x04,
|
||||||
|
|
||||||
|
/* The buffer will be used as a source to populate other resources
|
||||||
|
* with data */
|
||||||
|
RT_RENDER_BUFFER_USAGE_UPLOAD_BUFFER = 0x10,
|
||||||
|
} rt_render_buffer_usage_flags;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
/* The buffer can reside in memory that can only be accessed by the GPU */
|
||||||
|
RT_RENDER_BUFFER_ACCESS_GPU_ONLY = 0x01,
|
||||||
|
|
||||||
|
/* The buffer needs to be in CPU accessible memory */
|
||||||
|
RT_RENDER_BUFFER_ACCESS_CPU_AND_GPU = 0x02,
|
||||||
|
|
||||||
|
/* The buffer is short-lived (will be destroyed at the end of the frame) */
|
||||||
|
RT_RENDER_BUFFER_ACCESS_TRANSIENT = 0x04,
|
||||||
|
} rt_render_buffer_access_flags;
|
||||||
|
|
||||||
|
/* Describes a gpu buffer */
|
||||||
|
typedef struct {
|
||||||
|
/* The required size in bytes */
|
||||||
|
size_t size;
|
||||||
|
|
||||||
|
/* Bitmask describing the usage of the buffer */
|
||||||
|
rt_render_buffer_usage_flags usage;
|
||||||
|
|
||||||
|
/* Bitmask describing the access the buffer needs to support */
|
||||||
|
rt_render_buffer_access_flags access;
|
||||||
|
|
||||||
|
/* ResourceID of the resource that will be used to populate this buffer. */
|
||||||
|
rt_resource_id source_resource;
|
||||||
|
} rt_render_buffer_desc;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
RT_TEXTURE_FORMAT_B8G8R8A8_SRGB,
|
||||||
|
|
||||||
|
RT_TEXTURE_FORMAT_MAX,
|
||||||
|
} rt_texture_format;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
RT_TEXTURE_USAGE_NONE = 0x00,
|
||||||
|
RT_TEXTURE_USAGE_COLOR_ATTACHMENT = 0x01,
|
||||||
|
RT_TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT = 0x02,
|
||||||
|
RT_TEXTURE_USAGE_SAMPLED_IMAGE = 0x04,
|
||||||
|
RT_TEXTURE_USAGE_STORAGE_IMAGE = 0x10,
|
||||||
|
} rt_texture_usage_flags;
|
||||||
|
|
||||||
|
/* Describes a gpu texture */
|
||||||
|
typedef struct {
|
||||||
|
/* Width in pixels */
|
||||||
|
uint32_t width;
|
||||||
|
|
||||||
|
/* Height in pixels */
|
||||||
|
uint32_t height;
|
||||||
|
|
||||||
|
/* Pixel format */
|
||||||
|
rt_texture_format format;
|
||||||
|
|
||||||
|
/* Number of samples */
|
||||||
|
uint32_t samples;
|
||||||
|
|
||||||
|
/* Number of mip levels */
|
||||||
|
uint32_t mip_levels;
|
||||||
|
|
||||||
|
/* Bitmask of usages this texture needs to support */
|
||||||
|
rt_texture_usage_flags usage;
|
||||||
|
|
||||||
|
/* ResourceID of the resource that will be used to populate this texture. */
|
||||||
|
rt_resource_id source_resource;
|
||||||
|
} rt_render_texture2d_desc;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -1,10 +1,8 @@
|
|||||||
#ifndef RT_RENCOM_RENDERER_API_H
|
#ifndef RT_RENDERER_H
|
||||||
#define RT_RENCOM_RENDERER_API_H
|
#define RT_RENDERER_H
|
||||||
|
|
||||||
#include "runtime/runtime.h"
|
#include "runtime/runtime.h"
|
||||||
|
|
||||||
/* Defines the outward facing renderer api */
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
struct HINSTANCE__;
|
struct HINSTANCE__;
|
||||||
struct HWND__;
|
struct HWND__;
|
||||||
@ -25,10 +23,7 @@ typedef struct {
|
|||||||
unsigned int width;
|
unsigned int width;
|
||||||
unsigned int height;
|
unsigned int height;
|
||||||
int is_fullscreen;
|
int is_fullscreen;
|
||||||
} rt_renderer_init_info;
|
} rt_renderer_window_info;
|
||||||
|
|
||||||
typedef rt_result rt_renderer_init_fn(const rt_renderer_init_info *info);
|
|
||||||
typedef void rt_renderer_shutdown_fn(void);
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
RT_SHADER_STAGE_VERTEX,
|
RT_SHADER_STAGE_VERTEX,
|
||||||
@ -36,17 +31,24 @@ typedef enum {
|
|||||||
RT_SHADER_STAGE_COMPUTE,
|
RT_SHADER_STAGE_COMPUTE,
|
||||||
} rt_shader_stage;
|
} rt_shader_stage;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Public renderer interface */
|
RT_DLLEXPORT void rtRegisterRenderCVARs(void);
|
||||||
typedef struct {
|
|
||||||
rt_renderer_init_fn *Init;
|
|
||||||
rt_renderer_shutdown_fn *Shutdown;
|
|
||||||
} rt_renderer_api;
|
|
||||||
|
|
||||||
|
/* Loads the requested render backend */
|
||||||
|
RT_DLLEXPORT rt_result rtLoadRenderBackend(void);
|
||||||
|
|
||||||
/* Global renderer object */
|
RT_DLLEXPORT void rtRegisterRenderBackendCVARs(void);
|
||||||
RT_DLLIMPORT extern rt_renderer_api g_renderer;
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtLoadRenderer(void);
|
RT_DLLEXPORT rt_result rtInitRenderer(const rt_renderer_window_info *window);
|
||||||
|
|
||||||
|
RT_DLLEXPORT void rtShutdownRenderer(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
138
src/renderer/virtual_resource_registry.c
Normal file
138
src/renderer/virtual_resource_registry.c
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
#include <runtime/config.h>
|
||||||
|
#include <runtime/threading.h>
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
|
#include "virtual_resource_registry.h"
|
||||||
|
#include "render_resource.h"
|
||||||
|
|
||||||
|
RT_CVAR_SZ(r_MaxRenderResources, "Maximum number of render resources that can exist simultaneously. (Default: 16384)", 16384);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_render_resource_handle *handles;
|
||||||
|
struct {
|
||||||
|
union {
|
||||||
|
/* For management */
|
||||||
|
uint32_t next_free;
|
||||||
|
|
||||||
|
rt_render_buffer_desc buffer;
|
||||||
|
rt_render_texture2d_desc texture2d;
|
||||||
|
};
|
||||||
|
} *descs;
|
||||||
|
uint32_t first_free;
|
||||||
|
rt_rwlock lock;
|
||||||
|
} rt_description_table;
|
||||||
|
|
||||||
|
static rt_description_table _description_tab;
|
||||||
|
|
||||||
|
rt_result InitVirtualResourceRegistry(void) {
|
||||||
|
_description_tab.handles = calloc(r_MaxRenderResources.sz, sizeof(rt_render_resource_handle));
|
||||||
|
if (!_description_tab.handles)
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
_description_tab.descs = calloc(r_MaxRenderResources.sz, sizeof(*_description_tab.descs));
|
||||||
|
if (!_description_tab.descs) {
|
||||||
|
free(_description_tab.handles);
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
||||||
|
if (!lock_res.ok) {
|
||||||
|
free(_description_tab.handles);
|
||||||
|
free(_description_tab.descs);
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
_description_tab.lock = lock_res.lock;
|
||||||
|
_description_tab.first_free = 0;
|
||||||
|
for (uint32_t i = 0; i < r_MaxRenderResources.sz; ++i) {
|
||||||
|
_description_tab.descs[i].next_free = i + 1;
|
||||||
|
}
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ShutdownVirtualResourceRegistry(void) {
|
||||||
|
free(_description_tab.handles);
|
||||||
|
free(_description_tab.descs);
|
||||||
|
rtDestroyRWLock(&_description_tab.lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns true if the handle refers to an existing resource. */
|
||||||
|
bool rtIsRenderResourceKnown(rt_render_resource_handle h) {
|
||||||
|
uint32_t idx = rtGetRenderResourceHandleIndex(h);
|
||||||
|
if (idx >= r_MaxRenderResources.sz)
|
||||||
|
return false;
|
||||||
|
rtLockRead(&_description_tab.lock);
|
||||||
|
bool is_known = _description_tab.handles[idx].value == h.value;
|
||||||
|
rtUnlockRead(&_description_tab.lock);
|
||||||
|
return is_known;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const void *GetResourceDescription(rt_render_resource_handle h) {
|
||||||
|
rt_render_resource_type type = rtGetRenderResourceHandleType(h);
|
||||||
|
uint32_t idx = rtGetRenderResourceHandleIndex(h);
|
||||||
|
if (idx >= r_MaxRenderResources.sz)
|
||||||
|
return false;
|
||||||
|
const void *desc = NULL;
|
||||||
|
rtLockRead(&_description_tab.lock);
|
||||||
|
if (_description_tab.handles[idx].value == h.value) {
|
||||||
|
switch (type) {
|
||||||
|
case RT_RENDER_RESOURCE_TYPE_BUFFER:
|
||||||
|
desc = &_description_tab.descs[idx].buffer;
|
||||||
|
break;
|
||||||
|
case RT_RENDER_RESOURCE_TYPE_TEXTURE2D:
|
||||||
|
desc = &_description_tab.descs[idx].texture2d;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
desc = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rtUnlockRead(&_description_tab.lock);
|
||||||
|
return desc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns a pointer to the resource description or NULL, if the handle does not refer to a valid buffer */
|
||||||
|
const rt_render_buffer_desc *rtGetRenderBufferDescription(rt_render_resource_handle h) {
|
||||||
|
return GetResourceDescription(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns a pointer to the resource description or NULL, if the handle does not refer to a valid texture2d */
|
||||||
|
const rt_render_texture2d_desc *rtGetRenderTexture2DDescription(rt_render_resource_handle h) {
|
||||||
|
return GetResourceDescription(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_render_resource_handle AllocSlot(rt_render_resource_type type, const void *desc) {
|
||||||
|
rt_render_resource_handle h = rtMakeRenderResourceHandle(RT_RENDER_RESOURCE_TYPE_INVALID, 0, 0);
|
||||||
|
rtLockWrite(&_description_tab.lock);
|
||||||
|
uint32_t slot = _description_tab.first_free;
|
||||||
|
if (slot < r_MaxRenderResources.sz) {
|
||||||
|
_description_tab.first_free = _description_tab.descs[slot].next_free;
|
||||||
|
uint32_t current_version = rtGetRenderResourceHandleVersion(_description_tab.handles[slot]);
|
||||||
|
uint32_t next_version = (current_version + 1) & RT_RENDER_RESOURCE_MAX_VERSION;
|
||||||
|
h = rtMakeRenderResourceHandle(type, next_version, slot);
|
||||||
|
_description_tab.handles[slot] = h;
|
||||||
|
switch (type) {
|
||||||
|
case RT_RENDER_RESOURCE_TYPE_BUFFER:
|
||||||
|
memcpy(&_description_tab.descs[slot].buffer, desc, sizeof(rt_render_buffer_desc));
|
||||||
|
break;
|
||||||
|
case RT_RENDER_RESOURCE_TYPE_TEXTURE2D:
|
||||||
|
memcpy(&_description_tab.descs[slot].texture2d, desc, sizeof(rt_render_texture2d_desc));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rtReportError("RENDERER", "Tried to create a resource with invalid type %u", type);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rtUnlockWrite(&_description_tab.lock);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Stores the render buffer description and returns a handle to the new resource. */
|
||||||
|
rt_render_resource_handle rtCreateRenderBuffer(const rt_render_buffer_desc *desc) {
|
||||||
|
return AllocSlot(RT_RENDER_RESOURCE_TYPE_BUFFER, desc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Stores the texture2d description and returns a handle to the new resource. */
|
||||||
|
rt_render_resource_handle rtCreateRenderTexture2D(const rt_render_texture2d_desc *desc) {
|
||||||
|
return AllocSlot(RT_RENDER_RESOURCE_TYPE_TEXTURE2D, desc);
|
||||||
|
}
|
33
src/renderer/virtual_resource_registry.h
Normal file
33
src/renderer/virtual_resource_registry.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#ifndef RT_RENDERER_VIRTUAL_RESOURCE_REGISTRY_H
|
||||||
|
#define RT_RENDERER_VIRTUAL_RESOURCE_REGISTRY_H
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include "render_resource.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Returns true if the handle refers to an existing resource. */
|
||||||
|
bool rtIsRenderResourceKnown(rt_render_resource_handle h);
|
||||||
|
|
||||||
|
/* Returns a pointer to the resource description or NULL, if the handle does not refer to a valid buffer */
|
||||||
|
const rt_render_buffer_desc *rtGetRenderBufferDescription(rt_render_resource_handle h);
|
||||||
|
|
||||||
|
/* Returns a pointer to the resource description or NULL, if the handle does not refer to a valid texture2d */
|
||||||
|
const rt_render_texture2d_desc *rtGetRenderTexture2DDescription(rt_render_resource_handle h);
|
||||||
|
|
||||||
|
/* Stores the render buffer description and returns a handle to the new resource. */
|
||||||
|
rt_render_resource_handle rtCreateRenderBuffer(const rt_render_buffer_desc *desc);
|
||||||
|
|
||||||
|
/* Stores the texture2d description and returns a handle to the new resource. */
|
||||||
|
rt_render_resource_handle rtCreateRenderTexture2D(const rt_render_texture2d_desc *desc);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
481
src/renderer/vk/bindless_registry.c
Normal file
481
src/renderer/vk/bindless_registry.c
Normal file
@ -0,0 +1,481 @@
|
|||||||
|
#include "bindless_registry.h"
|
||||||
|
#include "device.h"
|
||||||
|
|
||||||
|
#include <runtime/config.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
RT_CVAR_UI(r_VkBindlessUniformBufferDescriptors,
|
||||||
|
"Number of uniform buffer descriptors to create. (Default: 1024)",
|
||||||
|
1024);
|
||||||
|
RT_CVAR_UI(r_VkBindlessStorageBufferDescriptors,
|
||||||
|
"Number of storage buffer descriptors to create. (Default: 1024)",
|
||||||
|
1024);
|
||||||
|
RT_CVAR_UI(r_VkBindlessSampledImageDescriptors,
|
||||||
|
"Number of sampled image descriptors to create. (Default: 1024)",
|
||||||
|
1024);
|
||||||
|
RT_CVAR_UI(r_VkBindlessStorageImageDescriptors,
|
||||||
|
"Number of storage image descriptors to create. (Default: 1024)",
|
||||||
|
1024);
|
||||||
|
RT_CVAR_UI(r_VkBindlessSamplerDescriptors,
|
||||||
|
"Number of sampler descriptors to create. (Default: 128)",
|
||||||
|
128);
|
||||||
|
|
||||||
|
static size_t ReleaseListCapacity(void) {
|
||||||
|
return (size_t)r_VkBindlessUniformBufferDescriptors.ui +
|
||||||
|
(size_t)r_VkBindlessStorageBufferDescriptors.ui +
|
||||||
|
(size_t)r_VkBindlessSampledImageDescriptors.ui +
|
||||||
|
(size_t)r_VkBindlessStorageBufferDescriptors.ui +
|
||||||
|
(size_t)r_VkBindlessSamplerDescriptors.ui;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_vk_bindless_reuse_stack AllocReuseStack(uint32_t max_descriptors) {
|
||||||
|
rt_vk_bindless_reuse_stack stack;
|
||||||
|
stack.indices = calloc(max_descriptors, sizeof(uint32_t));
|
||||||
|
stack.size = 0;
|
||||||
|
return stack;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_create_vk_bindless_registry_result rtCreateVkBindlessRegistry(rt_vk_device *dev) {
|
||||||
|
/* Check that required features are supported */
|
||||||
|
VkPhysicalDeviceDescriptorIndexingFeatures indexing_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES};
|
||||||
|
VkPhysicalDeviceFeatures2 features = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
||||||
|
.pNext = &indexing_features};
|
||||||
|
vkGetPhysicalDeviceFeatures2(dev->phys_device, &features);
|
||||||
|
|
||||||
|
if (!indexing_features.shaderSampledImageArrayNonUniformIndexing ||
|
||||||
|
!indexing_features.descriptorBindingSampledImageUpdateAfterBind ||
|
||||||
|
!indexing_features.shaderUniformBufferArrayNonUniformIndexing ||
|
||||||
|
!indexing_features.descriptorBindingUniformBufferUpdateAfterBind ||
|
||||||
|
!indexing_features.shaderStorageBufferArrayNonUniformIndexing ||
|
||||||
|
!indexing_features.descriptorBindingStorageBufferUpdateAfterBind ||
|
||||||
|
!indexing_features.shaderStorageImageArrayNonUniformIndexing ||
|
||||||
|
!indexing_features.descriptorBindingStorageImageUpdateAfterBind) {
|
||||||
|
|
||||||
|
#define TO_STRING(_f) ((_f) ? "SUPPORTED" : "NOT SUPPORTED")
|
||||||
|
rtReportError("VK",
|
||||||
|
"Required bindless features are not supported:\n"
|
||||||
|
" shaderSampledImageArrayNonUniformIndexing: %s\n"
|
||||||
|
" descriptorBindingSampledImageUpdateAfterBind: %s\n"
|
||||||
|
" shaderUniformBufferArrayNonUniformIndexing: %s\n"
|
||||||
|
" descriptorBindingUniformBufferUpdateAfterBind: %s\n"
|
||||||
|
" shaderStorageBufferArrayNonUniformIndexing: %s\n"
|
||||||
|
" descriptorBindingStorageBufferUpdateAfterBind: %s\n"
|
||||||
|
" shaderStorageImageArrayNonUniformIndexing: %s\n"
|
||||||
|
" descriptorBindingStorageImageUpdateAfterBind: %s",
|
||||||
|
TO_STRING(indexing_features.shaderSampledImageArrayNonUniformIndexing),
|
||||||
|
TO_STRING(indexing_features.descriptorBindingSampledImageUpdateAfterBind),
|
||||||
|
TO_STRING(indexing_features.shaderUniformBufferArrayNonUniformIndexing),
|
||||||
|
TO_STRING(indexing_features.descriptorBindingUniformBufferUpdateAfterBind),
|
||||||
|
TO_STRING(indexing_features.shaderStorageBufferArrayNonUniformIndexing),
|
||||||
|
TO_STRING(indexing_features.descriptorBindingStorageBufferUpdateAfterBind),
|
||||||
|
TO_STRING(indexing_features.shaderStorageImageArrayNonUniformIndexing),
|
||||||
|
TO_STRING(indexing_features.descriptorBindingStorageImageUpdateAfterBind));
|
||||||
|
|
||||||
|
/* NOTE(kevin): In the future we may fall back on a non-bindless renderer. But
|
||||||
|
* for now, we just error out */
|
||||||
|
return (rt_create_vk_bindless_registry_result){.result = RT_NOT_SUPPORTED};
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_vk_bindless_registry bindless_registry = {.dev = dev,
|
||||||
|
.uniform_buffer_binding = 0,
|
||||||
|
.storage_buffer_binding = 1,
|
||||||
|
.sampled_image_binding = 2,
|
||||||
|
.storage_image_binding = 3,
|
||||||
|
.sampler_binding = 4};
|
||||||
|
|
||||||
|
/* Create the descriptor set layout */
|
||||||
|
{
|
||||||
|
VkDescriptorSetLayoutBinding bindings[5];
|
||||||
|
VkDescriptorBindingFlags flags[5];
|
||||||
|
VkDescriptorType types[5] = {
|
||||||
|
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||||
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||||
|
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||||
|
VK_DESCRIPTOR_TYPE_SAMPLER,
|
||||||
|
};
|
||||||
|
uint32_t counts[5] = {
|
||||||
|
r_VkBindlessUniformBufferDescriptors.ui,
|
||||||
|
r_VkBindlessStorageBufferDescriptors.ui,
|
||||||
|
r_VkBindlessSampledImageDescriptors.ui,
|
||||||
|
r_VkBindlessStorageImageDescriptors.ui,
|
||||||
|
r_VkBindlessSamplerDescriptors.ui,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = 0; i < 5; ++i) {
|
||||||
|
bindings[i].binding = i;
|
||||||
|
bindings[i].descriptorType = types[i];
|
||||||
|
bindings[i].descriptorCount = counts[i];
|
||||||
|
bindings[i].stageFlags = VK_SHADER_STAGE_ALL, bindings[i].pImmutableSamplers = NULL;
|
||||||
|
flags[i] = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT |
|
||||||
|
VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDescriptorSetLayoutBindingFlagsCreateInfo binding_flags = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO,
|
||||||
|
.pBindingFlags = &flags[0],
|
||||||
|
.bindingCount = 5};
|
||||||
|
VkDescriptorSetLayoutCreateInfo layout_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||||
|
.pNext = &binding_flags,
|
||||||
|
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT,
|
||||||
|
.pBindings = &bindings[0],
|
||||||
|
.bindingCount = 5,
|
||||||
|
};
|
||||||
|
if (vkCreateDescriptorSetLayout(dev->device,
|
||||||
|
&layout_info,
|
||||||
|
dev->alloc_cb,
|
||||||
|
&bindless_registry.bindless_set_layout) != VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "Failed to create the bindless descriptor set layout.");
|
||||||
|
return (rt_create_vk_bindless_registry_result){.result = RT_UNKNOWN_ERROR};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create the descriptor pool */
|
||||||
|
{
|
||||||
|
VkDescriptorPoolSize pool_sizes[5] = {
|
||||||
|
{.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||||
|
.descriptorCount = r_VkBindlessUniformBufferDescriptors.ui},
|
||||||
|
{.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
.descriptorCount = r_VkBindlessStorageBufferDescriptors.ui},
|
||||||
|
{ .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||||
|
.descriptorCount = r_VkBindlessSampledImageDescriptors.ui },
|
||||||
|
{ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||||
|
.descriptorCount = r_VkBindlessStorageImageDescriptors.ui },
|
||||||
|
{ .type = VK_DESCRIPTOR_TYPE_SAMPLER,
|
||||||
|
.descriptorCount = r_VkBindlessSamplerDescriptors.ui },
|
||||||
|
};
|
||||||
|
|
||||||
|
VkDescriptorPoolCreateInfo pool_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||||
|
.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT,
|
||||||
|
.maxSets = 1,
|
||||||
|
.pPoolSizes = &pool_sizes[0],
|
||||||
|
.poolSizeCount = 5};
|
||||||
|
if (vkCreateDescriptorPool(dev->device,
|
||||||
|
&pool_info,
|
||||||
|
dev->alloc_cb,
|
||||||
|
&bindless_registry.bindless_set_pool) != VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "Failed to create the bindless descriptor pool.");
|
||||||
|
vkDestroyDescriptorSetLayout(dev->device,
|
||||||
|
bindless_registry.bindless_set_layout,
|
||||||
|
dev->alloc_cb);
|
||||||
|
return (rt_create_vk_bindless_registry_result){.result = RT_UNKNOWN_ERROR};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate the global descriptor set */
|
||||||
|
{
|
||||||
|
VkDescriptorSetAllocateInfo alloc_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||||
|
.descriptorPool = bindless_registry.bindless_set_pool,
|
||||||
|
.descriptorSetCount = 1,
|
||||||
|
.pSetLayouts = &bindless_registry.bindless_set_layout};
|
||||||
|
if (vkAllocateDescriptorSets(dev->device, &alloc_info, &bindless_registry.bindless_set) !=
|
||||||
|
VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "Failed to allocate the bindless descriptor set.");
|
||||||
|
vkDestroyDescriptorPool(dev->device,
|
||||||
|
bindless_registry.bindless_set_pool,
|
||||||
|
dev->alloc_cb);
|
||||||
|
vkDestroyDescriptorSetLayout(dev->device,
|
||||||
|
bindless_registry.bindless_set_layout,
|
||||||
|
dev->alloc_cb);
|
||||||
|
return (rt_create_vk_bindless_registry_result){.result = RT_UNKNOWN_ERROR};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Prepare the management data */
|
||||||
|
bindless_registry.uniform_buffer_reuse_stack =
|
||||||
|
AllocReuseStack(r_VkBindlessUniformBufferDescriptors.ui);
|
||||||
|
bindless_registry.storage_buffer_reuse_stack =
|
||||||
|
AllocReuseStack(r_VkBindlessStorageBufferDescriptors.ui);
|
||||||
|
bindless_registry.sampled_image_reuse_stack =
|
||||||
|
AllocReuseStack(r_VkBindlessSampledImageDescriptors.ui);
|
||||||
|
bindless_registry.storage_image_reuse_stack =
|
||||||
|
AllocReuseStack(r_VkBindlessStorageImageDescriptors.ui);
|
||||||
|
bindless_registry.sampler_reuse_stack = AllocReuseStack(r_VkBindlessSamplerDescriptors.ui);
|
||||||
|
|
||||||
|
bindless_registry.release_list =
|
||||||
|
calloc(ReleaseListCapacity(), sizeof(rt_vk_bindless_release_list_entry));
|
||||||
|
bindless_registry.release_list_length = 0;
|
||||||
|
|
||||||
|
bindless_registry.mutex = rtCreateMutex();
|
||||||
|
|
||||||
|
return (rt_create_vk_bindless_registry_result){.result = RT_SUCCESS,
|
||||||
|
.bindless_registry = bindless_registry};
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtDestroyVkBindlessRegistry(rt_vk_bindless_registry *registry) {
|
||||||
|
rtDestroyMutex(registry->mutex);
|
||||||
|
free(registry->release_list);
|
||||||
|
free(registry->uniform_buffer_reuse_stack.indices);
|
||||||
|
free(registry->storage_buffer_reuse_stack.indices);
|
||||||
|
free(registry->sampled_image_reuse_stack.indices);
|
||||||
|
free(registry->storage_image_reuse_stack.indices);
|
||||||
|
free(registry->sampler_reuse_stack.indices);
|
||||||
|
vkDestroyDescriptorPool(registry->dev->device,
|
||||||
|
registry->bindless_set_pool,
|
||||||
|
registry->dev->alloc_cb);
|
||||||
|
vkDestroyDescriptorSetLayout(registry->dev->device,
|
||||||
|
registry->bindless_set_layout,
|
||||||
|
registry->dev->alloc_cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_INLINE static rt_vk_bindless_handle MakeBindlessHandle(rt_vk_bindless_handle_type type,
|
||||||
|
uint32_t index) {
|
||||||
|
RT_ASSERT(index < (1u << 29), "");
|
||||||
|
rt_vk_bindless_handle handle = {.value = index | (type << 29)};
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
AcquireSlot(rt_vk_bindless_reuse_stack *reuse_stack, uint32_t *next_of_type, rt_cvar *max_cvar) {
|
||||||
|
uint32_t index = UINT32_MAX;
|
||||||
|
if (*next_of_type < max_cvar->ui) {
|
||||||
|
index = *next_of_type;
|
||||||
|
*next_of_type += 1;
|
||||||
|
} else if (reuse_stack->size > 0) {
|
||||||
|
index = reuse_stack->indices[--reuse_stack->size];
|
||||||
|
} else {
|
||||||
|
rtLog("VK", "No available descriptor set slots for requested resource.");
|
||||||
|
RT_DEBUGBREAK;
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_vk_bindless_handle rtStoreUniformBuffer(rt_vk_bindless_registry *registry, VkBuffer buffer) {
|
||||||
|
rtLockMutex(registry->mutex);
|
||||||
|
uint32_t index = AcquireSlot(®istry->uniform_buffer_reuse_stack,
|
||||||
|
®istry->next_uniform_buffer,
|
||||||
|
&r_VkBindlessUniformBufferDescriptors);
|
||||||
|
if (index == UINT32_MAX) {
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
return (rt_vk_bindless_handle){RT_VK_INVALID_BINDLESS_HANDLE_VALUE};
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDescriptorBufferInfo buffer_info = {
|
||||||
|
.buffer = buffer,
|
||||||
|
.range = VK_WHOLE_SIZE,
|
||||||
|
.offset = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkWriteDescriptorSet write = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||||
|
.dstBinding = registry->uniform_buffer_binding,
|
||||||
|
.dstSet = registry->bindless_set,
|
||||||
|
.dstArrayElement = index,
|
||||||
|
.pBufferInfo = &buffer_info,
|
||||||
|
};
|
||||||
|
|
||||||
|
vkUpdateDescriptorSets(registry->dev->device, 1, &write, 0, NULL);
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
|
||||||
|
return MakeBindlessHandle(RT_VK_BINDLESS_HANDLE_TYPE_UNIFORM_BUFFER, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_vk_bindless_handle rtStoreStorageBuffer(rt_vk_bindless_registry *registry, VkBuffer buffer) {
|
||||||
|
rtLockMutex(registry->mutex);
|
||||||
|
uint32_t index = AcquireSlot(®istry->storage_buffer_reuse_stack,
|
||||||
|
®istry->next_storage_buffer,
|
||||||
|
&r_VkBindlessStorageBufferDescriptors);
|
||||||
|
if (index == UINT32_MAX) {
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
return (rt_vk_bindless_handle){RT_VK_INVALID_BINDLESS_HANDLE_VALUE};
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDescriptorBufferInfo buffer_info = {
|
||||||
|
.buffer = buffer,
|
||||||
|
.range = VK_WHOLE_SIZE,
|
||||||
|
.offset = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkWriteDescriptorSet write = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
.dstBinding = registry->storage_buffer_binding,
|
||||||
|
.dstSet = registry->bindless_set,
|
||||||
|
.dstArrayElement = index,
|
||||||
|
.pBufferInfo = &buffer_info,
|
||||||
|
};
|
||||||
|
|
||||||
|
vkUpdateDescriptorSets(registry->dev->device, 1, &write, 0, NULL);
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
|
||||||
|
return MakeBindlessHandle(RT_VK_BINDLESS_HANDLE_TYPE_STORAGE_BUFFER, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_vk_bindless_handle rtStoreSampledImage(rt_vk_bindless_registry *registry, VkImageView image) {
|
||||||
|
rtLockMutex(registry->mutex);
|
||||||
|
uint32_t index = AcquireSlot(®istry->sampled_image_reuse_stack,
|
||||||
|
®istry->next_sampled_image,
|
||||||
|
&r_VkBindlessSampledImageDescriptors);
|
||||||
|
if (index == UINT32_MAX) {
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
return (rt_vk_bindless_handle){RT_VK_INVALID_BINDLESS_HANDLE_VALUE};
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDescriptorImageInfo image_info = {
|
||||||
|
.sampler = VK_NULL_HANDLE,
|
||||||
|
.imageView = image,
|
||||||
|
.imageLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkWriteDescriptorSet write = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
|
||||||
|
.dstBinding = registry->sampled_image_binding,
|
||||||
|
.dstSet = registry->bindless_set,
|
||||||
|
.dstArrayElement = index,
|
||||||
|
.pImageInfo = &image_info,
|
||||||
|
};
|
||||||
|
|
||||||
|
vkUpdateDescriptorSets(registry->dev->device, 1, &write, 0, NULL);
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
|
||||||
|
return MakeBindlessHandle(RT_VK_BINDLESS_HANDLE_TYPE_SAMPLED_IMAGE, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_vk_bindless_handle rtStoreStorageImage(rt_vk_bindless_registry *registry, VkImageView image) {
|
||||||
|
rtLockMutex(registry->mutex);
|
||||||
|
uint32_t index = AcquireSlot(®istry->storage_image_reuse_stack,
|
||||||
|
®istry->next_storage_image,
|
||||||
|
&r_VkBindlessStorageImageDescriptors);
|
||||||
|
if (index == UINT32_MAX) {
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
return (rt_vk_bindless_handle){RT_VK_INVALID_BINDLESS_HANDLE_VALUE};
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDescriptorImageInfo image_info = {
|
||||||
|
.sampler = VK_NULL_HANDLE,
|
||||||
|
.imageView = image,
|
||||||
|
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkWriteDescriptorSet write = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||||
|
.dstBinding = registry->storage_image_binding,
|
||||||
|
.dstSet = registry->bindless_set,
|
||||||
|
.dstArrayElement = index,
|
||||||
|
.pImageInfo = &image_info,
|
||||||
|
};
|
||||||
|
|
||||||
|
vkUpdateDescriptorSets(registry->dev->device, 1, &write, 0, NULL);
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
|
||||||
|
return MakeBindlessHandle(RT_VK_BINDLESS_HANDLE_TYPE_STORAGE_IMAGE, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_vk_bindless_handle rtStoreSampler(rt_vk_bindless_registry *registry, VkSampler sampler) {
|
||||||
|
rtLockMutex(registry->mutex);
|
||||||
|
uint32_t index = AcquireSlot(®istry->sampler_reuse_stack,
|
||||||
|
®istry->next_sampler,
|
||||||
|
&r_VkBindlessSamplerDescriptors);
|
||||||
|
if (index == UINT32_MAX) {
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
return (rt_vk_bindless_handle){RT_VK_INVALID_BINDLESS_HANDLE_VALUE};
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDescriptorImageInfo image_info = {
|
||||||
|
.sampler = sampler,
|
||||||
|
.imageView = VK_NULL_HANDLE,
|
||||||
|
.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkWriteDescriptorSet write = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
|
||||||
|
.dstBinding = registry->sampler_binding,
|
||||||
|
.dstSet = registry->bindless_set,
|
||||||
|
.dstArrayElement = index,
|
||||||
|
.pImageInfo = &image_info,
|
||||||
|
};
|
||||||
|
|
||||||
|
vkUpdateDescriptorSets(registry->dev->device, 1, &write, 0, NULL);
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
|
||||||
|
return MakeBindlessHandle(RT_VK_BINDLESS_HANDLE_TYPE_SAMPLER, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtFreeBindlessResource(rt_vk_bindless_registry *registry, rt_vk_bindless_handle handle) {
|
||||||
|
if (!rtIsVkBindlessHandleValid(handle))
|
||||||
|
return;
|
||||||
|
rt_vk_bindless_handle_type type = rtGetVkBindlessHandleType(handle);
|
||||||
|
RT_VERIFY(type >= RT_VK_BINDLESS_HANDLE_TYPE_UNIFORM_BUFFER &&
|
||||||
|
type <= RT_VK_BINDLESS_HANDLE_TYPE_SAMPLER);
|
||||||
|
|
||||||
|
rtLockMutex(registry->mutex);
|
||||||
|
RT_ASSERT(registry->release_list_length < ReleaseListCapacity(),
|
||||||
|
"Ran out of release list space.");
|
||||||
|
registry->release_list[registry->release_list_length].handle = handle;
|
||||||
|
registry->release_list[registry->release_list_length].frame =
|
||||||
|
registry->dev->current_frame_id + registry->dev->max_frames_in_flight;
|
||||||
|
++registry->release_list_length;
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtVkBindlessRegistryOnBeginFrame(rt_vk_bindless_registry *registry) {
|
||||||
|
/* Free resources that can be freed */
|
||||||
|
rtLockMutex(registry->mutex);
|
||||||
|
for (uint32_t i = 0; i < registry->release_list_length; ++i) {
|
||||||
|
if (registry->release_list[i].frame == registry->dev->current_frame_id) {
|
||||||
|
uint32_t index = rtGetVkBindlessHandleIndex(registry->release_list[i].handle);
|
||||||
|
switch (rtGetVkBindlessHandleType(registry->release_list[i].handle)) {
|
||||||
|
case RT_VK_BINDLESS_HANDLE_TYPE_UNIFORM_BUFFER:
|
||||||
|
RT_ASSERT(registry->uniform_buffer_reuse_stack.size <
|
||||||
|
r_VkBindlessUniformBufferDescriptors.ui,
|
||||||
|
"Stack overflow");
|
||||||
|
registry->uniform_buffer_reuse_stack
|
||||||
|
.indices[registry->uniform_buffer_reuse_stack.size++] = index;
|
||||||
|
break;
|
||||||
|
case RT_VK_BINDLESS_HANDLE_TYPE_STORAGE_BUFFER:
|
||||||
|
RT_ASSERT(registry->storage_buffer_reuse_stack.size <
|
||||||
|
r_VkBindlessStorageBufferDescriptors.ui,
|
||||||
|
"Stack overflow");
|
||||||
|
registry->storage_buffer_reuse_stack
|
||||||
|
.indices[registry->storage_buffer_reuse_stack.size++] = index;
|
||||||
|
break;
|
||||||
|
case RT_VK_BINDLESS_HANDLE_TYPE_SAMPLED_IMAGE:
|
||||||
|
RT_ASSERT(registry->sampled_image_reuse_stack.size <
|
||||||
|
r_VkBindlessSampledImageDescriptors.ui,
|
||||||
|
"Stack overflow");
|
||||||
|
registry->sampled_image_reuse_stack
|
||||||
|
.indices[registry->sampled_image_reuse_stack.size++] = index;
|
||||||
|
break;
|
||||||
|
case RT_VK_BINDLESS_HANDLE_TYPE_STORAGE_IMAGE:
|
||||||
|
RT_ASSERT(registry->storage_image_reuse_stack.size <
|
||||||
|
r_VkBindlessStorageImageDescriptors.ui,
|
||||||
|
"Stack overflow");
|
||||||
|
registry->storage_image_reuse_stack
|
||||||
|
.indices[registry->storage_image_reuse_stack.size++] = index;
|
||||||
|
break;
|
||||||
|
case RT_VK_BINDLESS_HANDLE_TYPE_SAMPLER:
|
||||||
|
RT_ASSERT(registry->sampler_reuse_stack.size < r_VkBindlessSamplerDescriptors.ui,
|
||||||
|
"Stack overflow");
|
||||||
|
registry->sampler_reuse_stack.indices[registry->sampler_reuse_stack.size++] = index;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rtLog("VK",
|
||||||
|
"Invalid bindless handle type: %u",
|
||||||
|
rtGetVkBindlessHandleType(registry->release_list[i].handle));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pop and swap */
|
||||||
|
if (i < registry->release_list_length - 1) {
|
||||||
|
registry->release_list[i] =
|
||||||
|
registry->release_list[registry->release_list_length - 1];
|
||||||
|
}
|
||||||
|
--i;
|
||||||
|
--registry->release_list_length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rtUnlockMutex(registry->mutex);
|
||||||
|
}
|
105
src/renderer/vk/bindless_registry.h
Normal file
105
src/renderer/vk/bindless_registry.h
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
#ifndef RT_VK_BINDLESS_REGISTRY_H
|
||||||
|
#define RT_VK_BINDLESS_REGISTRY_H
|
||||||
|
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/threading.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <volk/volk.h>
|
||||||
|
|
||||||
|
struct rt_vk_device;
|
||||||
|
|
||||||
|
enum { RT_VK_INVALID_BINDLESS_HANDLE_VALUE = UINT32_MAX };
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
RT_VK_BINDLESS_HANDLE_TYPE_UNIFORM_BUFFER,
|
||||||
|
RT_VK_BINDLESS_HANDLE_TYPE_STORAGE_BUFFER,
|
||||||
|
RT_VK_BINDLESS_HANDLE_TYPE_SAMPLED_IMAGE,
|
||||||
|
RT_VK_BINDLESS_HANDLE_TYPE_STORAGE_IMAGE,
|
||||||
|
RT_VK_BINDLESS_HANDLE_TYPE_SAMPLER,
|
||||||
|
} rt_vk_bindless_handle_type;
|
||||||
|
|
||||||
|
/* Handle to a bindless resource.
|
||||||
|
* The layout is:
|
||||||
|
* | type : 3 | index : 29 |
|
||||||
|
* MSB LSB
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
uint32_t value;
|
||||||
|
} rt_vk_bindless_handle;
|
||||||
|
|
||||||
|
/* Utilities for bindless handles */
|
||||||
|
|
||||||
|
static RT_INLINE bool rtIsVkBindlessHandleValid(rt_vk_bindless_handle handle) {
|
||||||
|
return handle.value != RT_VK_INVALID_BINDLESS_HANDLE_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static RT_INLINE rt_vk_bindless_handle_type rtGetVkBindlessHandleType(rt_vk_bindless_handle handle) {
|
||||||
|
return (handle.value >> 29) & 0x7;
|
||||||
|
}
|
||||||
|
|
||||||
|
static RT_INLINE uint32_t rtGetVkBindlessHandleIndex(rt_vk_bindless_handle handle) {
|
||||||
|
return handle.value & ((1u << 29) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint32_t *indices;
|
||||||
|
uint32_t size;
|
||||||
|
} rt_vk_bindless_reuse_stack;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_vk_bindless_handle handle;
|
||||||
|
/* Frame id on which the handle should be released */
|
||||||
|
uint32_t frame;
|
||||||
|
} rt_vk_bindless_release_list_entry;
|
||||||
|
|
||||||
|
/* Bindless registry manages the global descriptor set of bindless resources. */
|
||||||
|
typedef struct {
|
||||||
|
struct rt_vk_device *dev;
|
||||||
|
rt_mutex *mutex;
|
||||||
|
|
||||||
|
VkDescriptorSetLayout bindless_set_layout;
|
||||||
|
VkDescriptorPool bindless_set_pool;
|
||||||
|
VkDescriptorSet bindless_set;
|
||||||
|
|
||||||
|
uint32_t uniform_buffer_binding;
|
||||||
|
uint32_t storage_buffer_binding;
|
||||||
|
uint32_t sampled_image_binding;
|
||||||
|
uint32_t storage_image_binding;
|
||||||
|
uint32_t sampler_binding;
|
||||||
|
|
||||||
|
rt_vk_bindless_reuse_stack uniform_buffer_reuse_stack;
|
||||||
|
rt_vk_bindless_reuse_stack storage_buffer_reuse_stack;
|
||||||
|
rt_vk_bindless_reuse_stack sampled_image_reuse_stack;
|
||||||
|
rt_vk_bindless_reuse_stack storage_image_reuse_stack;
|
||||||
|
rt_vk_bindless_reuse_stack sampler_reuse_stack;
|
||||||
|
uint32_t next_uniform_buffer;
|
||||||
|
uint32_t next_storage_buffer;
|
||||||
|
uint32_t next_sampled_image;
|
||||||
|
uint32_t next_storage_image;
|
||||||
|
uint32_t next_sampler;
|
||||||
|
|
||||||
|
rt_vk_bindless_release_list_entry *release_list;
|
||||||
|
uint32_t release_list_length;
|
||||||
|
} rt_vk_bindless_registry;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_result result;
|
||||||
|
rt_vk_bindless_registry bindless_registry;
|
||||||
|
} rt_create_vk_bindless_registry_result;
|
||||||
|
|
||||||
|
rt_create_vk_bindless_registry_result rtCreateVkBindlessRegistry(struct rt_vk_device *dev);
|
||||||
|
|
||||||
|
void rtDestroyVkBindlessRegistry(rt_vk_bindless_registry *registry);
|
||||||
|
|
||||||
|
void rtVkBindlessRegistryOnBeginFrame(rt_vk_bindless_registry *registry);
|
||||||
|
|
||||||
|
rt_vk_bindless_handle rtStoreUniformBuffer(rt_vk_bindless_registry *registry, VkBuffer buffer);
|
||||||
|
rt_vk_bindless_handle rtStoreStorageBuffer(rt_vk_bindless_registry *registry, VkBuffer buffer);
|
||||||
|
rt_vk_bindless_handle rtStoreSampledImage(rt_vk_bindless_registry *registry, VkImageView image);
|
||||||
|
rt_vk_bindless_handle rtStoreStorageImage(rt_vk_bindless_registry *registry, VkImageView image);
|
||||||
|
rt_vk_bindless_handle rtStoreSampler(rt_vk_bindless_registry *registry, VkSampler sampler);
|
||||||
|
|
||||||
|
void rtFreeBindlessResource(rt_vk_bindless_registry *registry, rt_vk_bindless_handle handle);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
196
src/renderer/vk/command_buffers.c
Normal file
196
src/renderer/vk/command_buffers.c
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
#include "command_buffers.h"
|
||||||
|
#include "device.h"
|
||||||
|
|
||||||
|
/* We need one command pool per frame per thread. */
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <volk/volk.h>
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/atomics.h>
|
||||||
|
|
||||||
|
struct rt_vk_command_pool_set {
|
||||||
|
VkCommandPool graphics;
|
||||||
|
VkCommandPool compute;
|
||||||
|
VkCommandPool transfer;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long indices[3];
|
||||||
|
} rt_thread_pool_indices;
|
||||||
|
|
||||||
|
static RT_THREAD_LOCAL rt_thread_pool_indices t_pool_indices;
|
||||||
|
static RT_THREAD_LOCAL bool t_are_indices_initialized[3] = {false, false, false};
|
||||||
|
|
||||||
|
rt_vk_command_pool_array rtReserveCommandPoolArray(long max_threads) {
|
||||||
|
rt_vk_command_pool_array array;
|
||||||
|
array.next_unused = 0;
|
||||||
|
array.capacity = 0;
|
||||||
|
array.pools = calloc(max_threads, sizeof(rt_vk_command_pool_set));
|
||||||
|
if (!array.pools)
|
||||||
|
return array;
|
||||||
|
array.capacity = max_threads;
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtReleaseCommandPoolArray(rt_vk_command_pool_array *array) {
|
||||||
|
free(array->pools);
|
||||||
|
array->pools = NULL;
|
||||||
|
array->capacity = 0;
|
||||||
|
array->next_unused = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_vk_command_pool_set *GetThreadsPoolSet(rt_vk_device *dev, uint32_t frame_id) {
|
||||||
|
uint32_t slot = frame_id % dev->max_frames_in_flight;
|
||||||
|
if (!t_are_indices_initialized[slot]) {
|
||||||
|
/* Alloc the set */
|
||||||
|
rt_vk_command_pool_set *set = NULL;
|
||||||
|
long index = rtAtomic32Inc(&dev->frames[slot].command_pools.next_unused) - 1; /* Inc returns the new value */
|
||||||
|
if (index >= dev->frames[slot].command_pools.capacity) {
|
||||||
|
rtReportError("VK", "Ran out of space for per-thread command buffers. Try to increase r_VkMaxThreads");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
set = &dev->frames[slot].command_pools.pools[index];
|
||||||
|
|
||||||
|
/* Create the pools */
|
||||||
|
{
|
||||||
|
VkCommandPoolCreateInfo pool_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||||
|
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
|
||||||
|
.queueFamilyIndex = dev->graphics_family,
|
||||||
|
};
|
||||||
|
if (vkCreateCommandPool(dev->device, &pool_info, dev->alloc_cb, &set->graphics) != VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "vkCreateCommandPool failed.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
uint32_t thread_id = rtGetCurrentThreadId();
|
||||||
|
char name[128];
|
||||||
|
rtSPrint(name, 128, "Graphics Command Pool (FrameSlot %u, Thread %u)", slot, thread_id);
|
||||||
|
VkDebugUtilsObjectNameInfoEXT name_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
||||||
|
.objectHandle = (uint64_t)set->graphics,
|
||||||
|
.objectType = VK_OBJECT_TYPE_COMMAND_POOL,
|
||||||
|
.pObjectName = name,
|
||||||
|
};
|
||||||
|
vkSetDebugUtilsObjectNameEXT(dev->device, &name_info);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if (dev->compute_family != dev->graphics_family) {
|
||||||
|
VkCommandPoolCreateInfo pool_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||||
|
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
|
||||||
|
.queueFamilyIndex = dev->compute_family,
|
||||||
|
};
|
||||||
|
if (vkCreateCommandPool(dev->device, &pool_info, dev->alloc_cb, &set->compute) != VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "vkCreateCommandPool failed.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
uint32_t thread_id = rtGetCurrentThreadId();
|
||||||
|
char name[128];
|
||||||
|
rtSPrint(name, 128, "Compute Command Pool (FrameSlot %u, Thread %u)", slot, thread_id);
|
||||||
|
VkDebugUtilsObjectNameInfoEXT name_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
||||||
|
.objectHandle = (uint64_t)set->compute,
|
||||||
|
.objectType = VK_OBJECT_TYPE_COMMAND_POOL,
|
||||||
|
.pObjectName = name,
|
||||||
|
};
|
||||||
|
vkSetDebugUtilsObjectNameEXT(dev->device, &name_info);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
set->compute = set->graphics;
|
||||||
|
}
|
||||||
|
if (dev->transfer_family != dev->graphics_family && dev->transfer_family != dev->compute_family) {
|
||||||
|
VkCommandPoolCreateInfo pool_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||||
|
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
|
||||||
|
.queueFamilyIndex = dev->transfer_family,
|
||||||
|
};
|
||||||
|
if (vkCreateCommandPool(dev->device, &pool_info, dev->alloc_cb, &set->transfer) != VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "vkCreateCommandPool failed.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
uint32_t thread_id = rtGetCurrentThreadId();
|
||||||
|
char name[128];
|
||||||
|
rtSPrint(name, 128, "Transfer Command Pool (FrameSlot %u, Thread %u)", slot, thread_id);
|
||||||
|
VkDebugUtilsObjectNameInfoEXT name_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
||||||
|
.objectHandle = (uint64_t)set->transfer,
|
||||||
|
.objectType = VK_OBJECT_TYPE_COMMAND_POOL,
|
||||||
|
.pObjectName = name,
|
||||||
|
};
|
||||||
|
vkSetDebugUtilsObjectNameEXT(dev->device, &name_info);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else if (dev->transfer_family == dev->graphics_family) {
|
||||||
|
set->transfer = set->graphics;
|
||||||
|
} else {
|
||||||
|
RT_ASSERT(dev->transfer_family == dev->compute_family, "");
|
||||||
|
set->transfer = set->compute;
|
||||||
|
}
|
||||||
|
|
||||||
|
t_pool_indices.indices[slot] = index;
|
||||||
|
t_are_indices_initialized[slot] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
long index = t_pool_indices.indices[slot];
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
if (index >= dev->frames[slot].command_pools.next_unused)
|
||||||
|
return NULL;
|
||||||
|
#endif
|
||||||
|
return &dev->frames[slot].command_pools.pools[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
VkCommandPool rtGetGraphicsCommandPool(rt_vk_device *dev) {
|
||||||
|
rt_vk_command_pool_set *set = GetThreadsPoolSet(dev, dev->current_frame_id);
|
||||||
|
return RT_VERIFY(set)->graphics;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkCommandPool rtGetComputeCommandPool(rt_vk_device *dev) {
|
||||||
|
rt_vk_command_pool_set *set = GetThreadsPoolSet(dev, dev->current_frame_id);
|
||||||
|
return RT_VERIFY(set)->compute;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkCommandPool rtGetTransferCommandPool(rt_vk_device *dev) {
|
||||||
|
rt_vk_command_pool_set *set = GetThreadsPoolSet(dev, dev->current_frame_id);
|
||||||
|
return RT_VERIFY(set)->transfer;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkCommandBuffer rtAllocateGraphicsCommandBuffer(rt_vk_device *dev) {
|
||||||
|
VkCommandPool pool = rtGetGraphicsCommandPool(dev);
|
||||||
|
VkCommandBufferAllocateInfo alloc_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||||
|
.commandPool = pool,
|
||||||
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||||
|
.commandBufferCount = 1,
|
||||||
|
};
|
||||||
|
VkCommandBuffer cmdbuf = VK_NULL_HANDLE;
|
||||||
|
vkAllocateCommandBuffers(dev->device, &alloc_info, &cmdbuf);
|
||||||
|
return cmdbuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkCommandBuffer rtAllocateComputeCommandBuffer(rt_vk_device *dev) {
|
||||||
|
VkCommandPool pool = rtGetComputeCommandPool(dev);
|
||||||
|
VkCommandBufferAllocateInfo alloc_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||||
|
.commandPool = pool,
|
||||||
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||||
|
.commandBufferCount = 1,
|
||||||
|
};
|
||||||
|
VkCommandBuffer cmdbuf = VK_NULL_HANDLE;
|
||||||
|
vkAllocateCommandBuffers(dev->device, &alloc_info, &cmdbuf);
|
||||||
|
return cmdbuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkCommandBuffer rtAllocateTransferCommandBuffer(rt_vk_device *dev) {
|
||||||
|
VkCommandPool pool = rtGetTransferCommandPool(dev);
|
||||||
|
VkCommandBufferAllocateInfo alloc_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||||
|
.commandPool = pool,
|
||||||
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||||
|
.commandBufferCount = 1,
|
||||||
|
};
|
||||||
|
VkCommandBuffer cmdbuf = VK_NULL_HANDLE;
|
||||||
|
vkAllocateCommandBuffers(dev->device, &alloc_info, &cmdbuf);
|
||||||
|
return cmdbuf;
|
||||||
|
}
|
29
src/renderer/vk/command_buffers.h
Normal file
29
src/renderer/vk/command_buffers.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#ifndef RT_VK_COMMAND_LISTS_H
|
||||||
|
#define RT_VK_COMMAND_LISTS_H
|
||||||
|
|
||||||
|
#include <volk/volk.h>
|
||||||
|
|
||||||
|
struct rt_vk_device;
|
||||||
|
|
||||||
|
typedef struct rt_vk_command_pool_set rt_vk_command_pool_set;
|
||||||
|
|
||||||
|
/* Contains an array of command pools. Threads safe an index into the array
|
||||||
|
* to access their own command pool set. */
|
||||||
|
typedef struct {
|
||||||
|
rt_vk_command_pool_set *pools;
|
||||||
|
long capacity;
|
||||||
|
long next_unused;
|
||||||
|
} rt_vk_command_pool_array;
|
||||||
|
|
||||||
|
rt_vk_command_pool_array rtReserveCommandPoolArray(long max_threads);
|
||||||
|
void rtReleaseCommandPoolArray(rt_vk_command_pool_array *array);
|
||||||
|
|
||||||
|
VkCommandPool rtGetGraphicsCommandPool(struct rt_vk_device *dev);
|
||||||
|
VkCommandPool rtGetComputeCommandPool(struct rt_vk_device *dev);
|
||||||
|
VkCommandPool rtGetTransferCommandPool(struct rt_vk_device *dev);
|
||||||
|
|
||||||
|
VkCommandBuffer rtAllocateGraphicsCommandBuffer(struct rt_vk_device *dev);
|
||||||
|
VkCommandBuffer rtAllocateComputeCommandBuffer(struct rt_vk_device *dev);
|
||||||
|
VkCommandBuffer rtAllocateTransferCommandBuffer(struct rt_vk_device *dev);
|
||||||
|
|
||||||
|
#endif
|
771
src/renderer/vk/device.c
Normal file
771
src/renderer/vk/device.c
Normal file
@ -0,0 +1,771 @@
|
|||||||
|
#include <malloc.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/config.h>
|
||||||
|
#include <runtime/timing.h>
|
||||||
|
|
||||||
|
#include "device.h"
|
||||||
|
#include "physical_resource_manager.h"
|
||||||
|
|
||||||
|
#define TARGET_API_VERSION VK_API_VERSION_1_3
|
||||||
|
|
||||||
|
RT_CVAR_I(r_VkEnableAPIAllocTracking,
|
||||||
|
"Enable tracking of allocations done by the vulkan api. [0/1] Default: 0",
|
||||||
|
0);
|
||||||
|
|
||||||
|
RT_CVAR_S(r_VkPhysDeviceName, "Name of the selected physical device. Default: \"\"", "");
|
||||||
|
|
||||||
|
RT_CVAR_I(r_VkMaxFramesInFlight, "Maximum number of frames in flight. [2/3] Default: 2", 2);
|
||||||
|
RT_CVAR_UI(r_VkMaxThreads, "Maximum number of threads used. (Default: 128)", 128);
|
||||||
|
|
||||||
|
static VkAllocationCallbacks _tracking_alloc_cbs;
|
||||||
|
|
||||||
|
static const char *AllocationScopeToString(VkSystemAllocationScope scope) {
|
||||||
|
switch (scope) {
|
||||||
|
case VK_SYSTEM_ALLOCATION_SCOPE_COMMAND:
|
||||||
|
return "COMMAND";
|
||||||
|
case VK_SYSTEM_ALLOCATION_SCOPE_OBJECT:
|
||||||
|
return "OBJECT";
|
||||||
|
case VK_SYSTEM_ALLOCATION_SCOPE_CACHE:
|
||||||
|
return "CACHE";
|
||||||
|
case VK_SYSTEM_ALLOCATION_SCOPE_DEVICE:
|
||||||
|
return "DEVICE";
|
||||||
|
case VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE:
|
||||||
|
return "INSTANCE";
|
||||||
|
default:
|
||||||
|
return "UNKNOWN";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *
|
||||||
|
TrackAllocation(void *userData, size_t size, size_t alignment, VkSystemAllocationScope scope) {
|
||||||
|
rtLog("VK",
|
||||||
|
"Allocation. Size: %zu, Alignment: %zu, Scope: %s",
|
||||||
|
size,
|
||||||
|
alignment,
|
||||||
|
AllocationScopeToString(scope));
|
||||||
|
#ifdef _WIN32
|
||||||
|
return _aligned_malloc(size, alignment);
|
||||||
|
#else
|
||||||
|
return aligned_alloc(alignment, size);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *TrackReallocation(void *userData,
|
||||||
|
void *original,
|
||||||
|
size_t size,
|
||||||
|
size_t alignment,
|
||||||
|
VkSystemAllocationScope scope) {
|
||||||
|
rtLog("VK",
|
||||||
|
"Reallocation. Size: %zu, Alignment: %zu, Scope: %s",
|
||||||
|
size,
|
||||||
|
alignment,
|
||||||
|
AllocationScopeToString(scope));
|
||||||
|
return realloc(original, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TrackFree(void *userData, void *memory) {
|
||||||
|
free(memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkBool32 VKAPI_PTR
|
||||||
|
DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
|
||||||
|
VkDebugUtilsMessageTypeFlagsEXT types,
|
||||||
|
const VkDebugUtilsMessengerCallbackDataEXT *callbackData,
|
||||||
|
void *userData) {
|
||||||
|
if (severity < VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)
|
||||||
|
return VK_FALSE;
|
||||||
|
|
||||||
|
const char *severity_str = "<UNKNOWN>";
|
||||||
|
if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)
|
||||||
|
severity_str = "WARNING";
|
||||||
|
else if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
|
||||||
|
severity_str = "ERROR";
|
||||||
|
rtLog("VK", "[%s] %s", severity_str, callbackData->pMessage);
|
||||||
|
if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
|
||||||
|
RT_DEBUGBREAK;
|
||||||
|
return VK_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_result CreateInstance(rt_vk_device *dev) {
|
||||||
|
VkResult result = volkInitialize();
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
rtReportError("vk", "Initialization failed: volkInitialize()");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkApplicationInfo app_info = {
|
||||||
|
.apiVersion = TARGET_API_VERSION,
|
||||||
|
.applicationVersion = 0x00001000,
|
||||||
|
.engineVersion = 0x00001000,
|
||||||
|
.pEngineName = "voyageEngine",
|
||||||
|
.pApplicationName = "Voyage",
|
||||||
|
};
|
||||||
|
|
||||||
|
const char *extensions[] = {
|
||||||
|
VK_KHR_SURFACE_EXTENSION_NAME,
|
||||||
|
#ifdef _WIN32
|
||||||
|
"VK_KHR_win32_surface",
|
||||||
|
#elif defined(RT_USE_XLIB)
|
||||||
|
"VK_KHR_xlib_surface",
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
VK_EXT_DEBUG_UTILS_EXTENSION_NAME,
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
const char *layers[1];
|
||||||
|
unsigned int layer_count = 0;
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
/* Search for layers we want to enable */
|
||||||
|
uint32_t available_layer_count = 0;
|
||||||
|
result = vkEnumerateInstanceLayerProperties(&available_layer_count, NULL);
|
||||||
|
if (result == VK_SUCCESS) {
|
||||||
|
VkLayerProperties *props = calloc(available_layer_count, sizeof(VkLayerProperties));
|
||||||
|
if (props) {
|
||||||
|
vkEnumerateInstanceLayerProperties(&available_layer_count, props);
|
||||||
|
for (uint32_t i = 0; i < available_layer_count; ++i) {
|
||||||
|
if (strcmp(props[i].layerName, "VK_LAYER_KHRONOS_validation") == 0) {
|
||||||
|
layers[0] = "VK_LAYER_KHRONOS_validation";
|
||||||
|
layer_count = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(props);
|
||||||
|
} else {
|
||||||
|
rtLog("VK", "Failed to allocate storage for instance layer properties.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rtLog("VK", "vkEnumerateInstanceLayerProperties failed.");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
VkInstanceCreateInfo instance_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
||||||
|
.pApplicationInfo = &app_info,
|
||||||
|
.ppEnabledExtensionNames = extensions,
|
||||||
|
.enabledExtensionCount = RT_ARRAY_COUNT(extensions),
|
||||||
|
.ppEnabledLayerNames = layers,
|
||||||
|
.enabledLayerCount = layer_count,
|
||||||
|
};
|
||||||
|
result = vkCreateInstance(&instance_info, dev->alloc_cb, &dev->instance);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "Failed to create the vulkan instance.");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
volkLoadInstance(dev->instance);
|
||||||
|
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
/* Create the debug utils messenger */
|
||||||
|
VkDebugUtilsMessengerCreateInfoEXT messenger_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
|
||||||
|
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
|
||||||
|
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
|
||||||
|
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
|
||||||
|
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
|
||||||
|
.pfnUserCallback = DebugUtilsMessengerCb,
|
||||||
|
};
|
||||||
|
vkCreateDebugUtilsMessengerEXT(dev->instance,
|
||||||
|
&messenger_info,
|
||||||
|
dev->alloc_cb,
|
||||||
|
&dev->messenger);
|
||||||
|
#endif
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_result CreateSurface(const rt_renderer_window_info *info, rt_vk_device *dev) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
dev->native_window.hInstance = info->hInstance;
|
||||||
|
dev->native_window.hWnd = info->hWnd;
|
||||||
|
VkWin32SurfaceCreateInfoKHR surface_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
|
||||||
|
.hinstance = info->hInstance,
|
||||||
|
.hwnd = info->hWnd,
|
||||||
|
};
|
||||||
|
if (vkCreateWin32SurfaceKHR(dev->instance, &surface_info, dev->alloc_cb, &dev->surface) ==
|
||||||
|
VK_SUCCESS)
|
||||||
|
return RT_SUCCESS;
|
||||||
|
else
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
#elif defined(RT_USE_XLIB)
|
||||||
|
dev->native_window.display = info->display;
|
||||||
|
dev->native_window.window = info->window;
|
||||||
|
VkXlibSurfaceCreateInfoKHR surface_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
|
||||||
|
.dpy = info->display,
|
||||||
|
.window = info->window,
|
||||||
|
};
|
||||||
|
if (vkCreateXlibSurfaceKHR(dev->instance, &surface_info, dev->alloc_cb, &dev->surface) == VK_SUCCESS)
|
||||||
|
return RT_SUCCESS;
|
||||||
|
else
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint32_t graphics;
|
||||||
|
uint32_t compute;
|
||||||
|
uint32_t present;
|
||||||
|
uint32_t transfer;
|
||||||
|
} rt_queue_indices;
|
||||||
|
|
||||||
|
static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfaceKHR surface) {
|
||||||
|
rt_queue_indices indices = {.graphics = UINT32_MAX,
|
||||||
|
.compute = UINT32_MAX,
|
||||||
|
.present = UINT32_MAX,
|
||||||
|
.transfer = UINT32_MAX};
|
||||||
|
|
||||||
|
uint32_t count = 0;
|
||||||
|
vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, NULL);
|
||||||
|
VkQueueFamilyProperties *props = calloc(count, sizeof(VkQueueFamilyProperties));
|
||||||
|
if (!props) {
|
||||||
|
return indices;
|
||||||
|
}
|
||||||
|
vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, props);
|
||||||
|
for (uint32_t i = 0; i < count; ++i) {
|
||||||
|
if (props[i].queueCount == 0)
|
||||||
|
continue;
|
||||||
|
if ((props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0)
|
||||||
|
indices.graphics = i;
|
||||||
|
if ((props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
|
||||||
|
indices.compute = i;
|
||||||
|
if ((props[i].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
|
||||||
|
indices.transfer = i;
|
||||||
|
|
||||||
|
VkBool32 present_supported = VK_FALSE;
|
||||||
|
vkGetPhysicalDeviceSurfaceSupportKHR(phys_dev, i, surface, &present_supported);
|
||||||
|
if (present_supported)
|
||||||
|
indices.present = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (indices.transfer == UINT32_MAX && indices.graphics != UINT32_MAX)
|
||||||
|
indices.transfer = indices.graphics;
|
||||||
|
else if (indices.transfer == UINT32_MAX && indices.compute != UINT32_MAX)
|
||||||
|
indices.transfer = indices.compute;
|
||||||
|
|
||||||
|
free(props);
|
||||||
|
return indices;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool CheckDeviceExtensionSupported(VkPhysicalDevice phys_dev) {
|
||||||
|
const char *required_extensions[] = {
|
||||||
|
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t extension_count;
|
||||||
|
vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, NULL);
|
||||||
|
|
||||||
|
VkExtensionProperties *supported_extensions =
|
||||||
|
calloc(extension_count, sizeof(VkExtensionProperties));
|
||||||
|
if (!supported_extensions)
|
||||||
|
return false;
|
||||||
|
vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, supported_extensions);
|
||||||
|
|
||||||
|
bool supported = true;
|
||||||
|
for (uint32_t i = 0; i < RT_ARRAY_COUNT(required_extensions); ++i) {
|
||||||
|
bool found = false;
|
||||||
|
for (uint32_t j = 0; j < extension_count; ++j) {
|
||||||
|
if (strncmp(supported_extensions[j].extensionName,
|
||||||
|
required_extensions[i],
|
||||||
|
VK_MAX_EXTENSION_NAME_SIZE) == 0) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
supported = false;
|
||||||
|
VkPhysicalDeviceProperties props;
|
||||||
|
vkGetPhysicalDeviceProperties(phys_dev, &props);
|
||||||
|
rtLog("VK", "Device %s does not support the required extension %s",
|
||||||
|
props.deviceName,
|
||||||
|
required_extensions[i]);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
free(supported_extensions);
|
||||||
|
return supported;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_result ChoosePhysicalDevice(rt_vk_device *dev) {
|
||||||
|
|
||||||
|
dev->phys_device = VK_NULL_HANDLE;
|
||||||
|
uint32_t phys_device_count = 0;
|
||||||
|
VkResult result = vkEnumeratePhysicalDevices(dev->instance, &phys_device_count, NULL);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "Failed to enumerate the physical devices.");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
VkPhysicalDevice *phys_devices = calloc(phys_device_count, sizeof(VkPhysicalDevice));
|
||||||
|
if (!phys_devices) {
|
||||||
|
rtReportError("VK", "Failed to enumerate the physical devices: Out of memory.");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
vkEnumeratePhysicalDevices(dev->instance, &phys_device_count, phys_devices);
|
||||||
|
|
||||||
|
uint32_t highscore = 0;
|
||||||
|
uint32_t best_index = phys_device_count;
|
||||||
|
for (uint32_t i = 0; i < phys_device_count; ++i) {
|
||||||
|
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceSynchronization2Features synchronization2_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
|
||||||
|
.pNext = &timeline_semaphore_features,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
|
||||||
|
.pNext = &synchronization2_features,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
|
||||||
|
.pNext = &dynamic_rendering_features,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceFeatures2 features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
||||||
|
.pNext = &descriptor_indexing_features,
|
||||||
|
};
|
||||||
|
vkGetPhysicalDeviceFeatures2(phys_devices[i], &features);
|
||||||
|
|
||||||
|
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES,
|
||||||
|
.pNext = NULL,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceProperties2 props = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
|
||||||
|
.pNext = &descriptor_indexing_props,
|
||||||
|
};
|
||||||
|
vkGetPhysicalDeviceProperties2(phys_devices[i], &props);
|
||||||
|
|
||||||
|
if (!CheckDeviceExtensionSupported(phys_devices[i]))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
rt_queue_indices indices = RetrieveQueueIndices(phys_devices[i], dev->surface);
|
||||||
|
if (indices.compute == UINT32_MAX || indices.present == UINT32_MAX ||
|
||||||
|
indices.graphics == UINT32_MAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!synchronization2_features.synchronization2 ||
|
||||||
|
!dynamic_rendering_features.dynamicRendering ||
|
||||||
|
!timeline_semaphore_features.timelineSemaphore)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Check for bindless support */
|
||||||
|
if (!descriptor_indexing_features.runtimeDescriptorArray ||
|
||||||
|
!descriptor_indexing_features.descriptorBindingPartiallyBound)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
uint32_t score = 0;
|
||||||
|
|
||||||
|
if (props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)
|
||||||
|
score += 100;
|
||||||
|
|
||||||
|
score += (props.properties.limits.maxFramebufferWidth / 100) *
|
||||||
|
(props.properties.limits.maxFramebufferHeight / 100);
|
||||||
|
|
||||||
|
score +=
|
||||||
|
(descriptor_indexing_props.shaderStorageBufferArrayNonUniformIndexingNative) ? 100 : 0;
|
||||||
|
score +=
|
||||||
|
(descriptor_indexing_props.shaderSampledImageArrayNonUniformIndexingNative) ? 100 : 0;
|
||||||
|
|
||||||
|
if (score > highscore) {
|
||||||
|
highscore = score;
|
||||||
|
best_index = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strncmp(props.properties.deviceName,
|
||||||
|
r_VkPhysDeviceName.s,
|
||||||
|
VK_MAX_PHYSICAL_DEVICE_NAME_SIZE) == 0) {
|
||||||
|
best_index = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (best_index < phys_device_count) {
|
||||||
|
dev->phys_device = phys_devices[best_index];
|
||||||
|
|
||||||
|
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES,
|
||||||
|
.pNext = NULL,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceProperties2 props = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
|
||||||
|
.pNext = &descriptor_indexing_props,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceFeatures2 features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
||||||
|
.pNext = &descriptor_indexing_features,
|
||||||
|
};
|
||||||
|
vkGetPhysicalDeviceFeatures2(phys_devices[best_index], &features);
|
||||||
|
vkGetPhysicalDeviceProperties2(phys_devices[best_index], &props);
|
||||||
|
|
||||||
|
dev->phys_device_props = props.properties;
|
||||||
|
dev->descriptor_indexing_props = descriptor_indexing_props;
|
||||||
|
dev->phys_device_features = features.features;
|
||||||
|
dev->descriptor_indexing_features = descriptor_indexing_features;
|
||||||
|
}
|
||||||
|
free(phys_devices);
|
||||||
|
|
||||||
|
if (dev->phys_device == VK_NULL_HANDLE) {
|
||||||
|
rtReportError("vk", "Failed to find a suitable physical device.");
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_result CreateDevice(rt_vk_device *dev) {
|
||||||
|
const char *extensions[] = {
|
||||||
|
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||||
|
};
|
||||||
|
|
||||||
|
rt_queue_indices queue_indices = RetrieveQueueIndices(dev->phys_device, dev->surface);
|
||||||
|
|
||||||
|
dev->compute_family = queue_indices.compute;
|
||||||
|
dev->graphics_family = queue_indices.graphics;
|
||||||
|
dev->present_family = queue_indices.present;
|
||||||
|
dev->transfer_family = queue_indices.transfer;
|
||||||
|
|
||||||
|
float priority = 1.f;
|
||||||
|
|
||||||
|
uint32_t distinct_queue_count = 1;
|
||||||
|
VkDeviceQueueCreateInfo queue_info[4];
|
||||||
|
queue_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||||
|
queue_info[0].pNext = NULL;
|
||||||
|
queue_info[0].flags = 0;
|
||||||
|
queue_info[0].queueCount = 1;
|
||||||
|
queue_info[0].queueFamilyIndex = queue_indices.graphics;
|
||||||
|
queue_info[0].pQueuePriorities = &priority;
|
||||||
|
dev->unique_families[0] = queue_indices.graphics;
|
||||||
|
if (queue_indices.compute != queue_indices.graphics) {
|
||||||
|
queue_info[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||||
|
queue_info[1].pNext = NULL;
|
||||||
|
queue_info[1].flags = 0;
|
||||||
|
queue_info[1].queueCount = 1;
|
||||||
|
queue_info[1].queueFamilyIndex = queue_indices.compute;
|
||||||
|
queue_info[1].pQueuePriorities = &priority;
|
||||||
|
dev->unique_families[distinct_queue_count] = queue_indices.compute;
|
||||||
|
++distinct_queue_count;
|
||||||
|
}
|
||||||
|
if (queue_indices.present != queue_indices.graphics &&
|
||||||
|
queue_indices.present != queue_indices.compute) {
|
||||||
|
queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||||
|
queue_info[distinct_queue_count].pNext = NULL;
|
||||||
|
queue_info[distinct_queue_count].flags = 0;
|
||||||
|
queue_info[distinct_queue_count].queueCount = 1;
|
||||||
|
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.present;
|
||||||
|
queue_info[distinct_queue_count].pQueuePriorities = &priority;
|
||||||
|
dev->unique_families[distinct_queue_count] = queue_indices.present;
|
||||||
|
++distinct_queue_count;
|
||||||
|
}
|
||||||
|
if (queue_indices.transfer != queue_indices.graphics &&
|
||||||
|
queue_indices.transfer != queue_indices.compute &&
|
||||||
|
queue_indices.transfer != queue_indices.present) {
|
||||||
|
queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||||
|
queue_info[distinct_queue_count].pNext = NULL;
|
||||||
|
queue_info[distinct_queue_count].flags = 0;
|
||||||
|
queue_info[distinct_queue_count].queueCount = 1;
|
||||||
|
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.transfer;
|
||||||
|
queue_info[distinct_queue_count].pQueuePriorities = &priority;
|
||||||
|
dev->unique_families[distinct_queue_count] = queue_indices.transfer;
|
||||||
|
++distinct_queue_count;
|
||||||
|
}
|
||||||
|
dev->unique_family_count = distinct_queue_count;
|
||||||
|
|
||||||
|
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceSynchronization2Features synchronization2_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
|
||||||
|
.pNext = &timeline_semaphore_features,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
|
||||||
|
.pNext = &synchronization2_features,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceDescriptorIndexingFeatures indexing_features = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
|
||||||
|
.pNext = &dynamic_rendering_features,
|
||||||
|
};
|
||||||
|
VkPhysicalDeviceFeatures2 features = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
||||||
|
.pNext = &indexing_features};
|
||||||
|
vkGetPhysicalDeviceFeatures2(dev->phys_device, &features);
|
||||||
|
|
||||||
|
RT_ASSERT(indexing_features.runtimeDescriptorArray &&
|
||||||
|
indexing_features.descriptorBindingPartiallyBound,
|
||||||
|
"We require a device that supports bindless vulkan.");
|
||||||
|
|
||||||
|
VkDeviceCreateInfo device_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
||||||
|
.pNext = &features,
|
||||||
|
.enabledExtensionCount = RT_ARRAY_COUNT(extensions),
|
||||||
|
.ppEnabledExtensionNames = extensions,
|
||||||
|
.pQueueCreateInfos = queue_info,
|
||||||
|
.queueCreateInfoCount = distinct_queue_count,
|
||||||
|
};
|
||||||
|
if (vkCreateDevice(dev->phys_device, &device_info, dev->alloc_cb, &dev->device) !=
|
||||||
|
VK_SUCCESS) {
|
||||||
|
rtReportError("VK", "Device creation failed.");
|
||||||
|
return 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
vkGetDeviceQueue(dev->device, queue_indices.graphics, 0, &dev->graphics_queue);
|
||||||
|
vkGetDeviceQueue(dev->device, queue_indices.compute, 0, &dev->compute_queue);
|
||||||
|
vkGetDeviceQueue(dev->device, queue_indices.present, 0, &dev->present_queue);
|
||||||
|
vkGetDeviceQueue(dev->device, queue_indices.transfer, 0, &dev->transfer_queue);
|
||||||
|
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_result CreatePerFrameObjects(rt_vk_device *dev) {
|
||||||
|
for (unsigned int i = 0; i < dev->max_frames_in_flight; ++i) {
|
||||||
|
VkSemaphoreCreateInfo semaphore_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||||
|
};
|
||||||
|
if (vkCreateSemaphore(dev->device,
|
||||||
|
&semaphore_info,
|
||||||
|
dev->alloc_cb,
|
||||||
|
&dev->frames[i].render_finished) != VK_SUCCESS) {
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
if (vkCreateSemaphore(dev->device,
|
||||||
|
&semaphore_info,
|
||||||
|
dev->alloc_cb,
|
||||||
|
&dev->frames[i].image_available) != VK_SUCCESS) {
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
if (vkCreateSemaphore(dev->device,
|
||||||
|
&semaphore_info,
|
||||||
|
dev->alloc_cb,
|
||||||
|
&dev->frames[i].swapchain_transitioned) != VK_SUCCESS) {
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
char name[128];
|
||||||
|
rtSPrint(name, 128, "Render Finished Semaphore (%u)", i);
|
||||||
|
VkDebugUtilsObjectNameInfoEXT name_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
||||||
|
.objectHandle = (uint64_t)dev->frames[i].render_finished,
|
||||||
|
.objectType = VK_OBJECT_TYPE_SEMAPHORE,
|
||||||
|
.pObjectName = name,
|
||||||
|
};
|
||||||
|
vkSetDebugUtilsObjectNameEXT(dev->device, &name_info);
|
||||||
|
|
||||||
|
rtSPrint(name, 128, "Image Available Semaphore (%u)", i);
|
||||||
|
name_info.objectHandle = (uint64_t)dev->frames[i].image_available;
|
||||||
|
vkSetDebugUtilsObjectNameEXT(dev->device, &name_info);
|
||||||
|
|
||||||
|
rtSPrint(name, 128, "Swapchain Transitioned Semaphore (%u)", i);
|
||||||
|
name_info.objectHandle = (uint64_t)dev->frames[i].swapchain_transitioned;
|
||||||
|
vkSetDebugUtilsObjectNameEXT(dev->device, &name_info);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
dev->frames[i].command_pools = rtReserveCommandPoolArray(r_VkMaxThreads.ui);
|
||||||
|
}
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DestroyPerFrameObjects(rt_vk_device *dev) {
|
||||||
|
for (unsigned int i = 0; i < dev->max_frames_in_flight; ++i) {
|
||||||
|
vkDestroySemaphore(dev->device, dev->frames[i].image_available, dev->alloc_cb);
|
||||||
|
vkDestroySemaphore(dev->device, dev->frames[i].render_finished, dev->alloc_cb);
|
||||||
|
vkDestroySemaphore(dev->device, dev->frames[i].swapchain_transitioned, dev->alloc_cb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern rt_result CreateSwapchain(rt_vk_device *dev);
|
||||||
|
extern rt_result RecreateSwapchain(rt_vk_device *dev);
|
||||||
|
extern void DestroySwapchain(rt_vk_device *dev);
|
||||||
|
|
||||||
|
rt_create_vk_device_result rtCreateVkDevice(const rt_renderer_window_info *info) {
|
||||||
|
rtLog("VK", "Init");
|
||||||
|
|
||||||
|
rt_timestamp initBegin = rtTimeNow();
|
||||||
|
rt_vk_device dev;
|
||||||
|
|
||||||
|
_tracking_alloc_cbs.pUserData = NULL;
|
||||||
|
_tracking_alloc_cbs.pfnAllocation = TrackAllocation;
|
||||||
|
_tracking_alloc_cbs.pfnReallocation = TrackReallocation;
|
||||||
|
_tracking_alloc_cbs.pfnFree = TrackFree;
|
||||||
|
|
||||||
|
if (r_VkEnableAPIAllocTracking.i) {
|
||||||
|
dev.alloc_cb = &_tracking_alloc_cbs;
|
||||||
|
} else {
|
||||||
|
dev.alloc_cb = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
dev.max_frames_in_flight = RT_RESTRICT_VALUE_TO_BOUNDS(r_VkMaxFramesInFlight.i, 2, 3);
|
||||||
|
rtLog("VK", "Max frames in flight: %u", dev.max_frames_in_flight);
|
||||||
|
if ((int)dev.max_frames_in_flight != r_VkMaxFramesInFlight.i) {
|
||||||
|
r_VkMaxFramesInFlight.i = (int)dev.max_frames_in_flight;
|
||||||
|
rtNotifyCVARChange(&r_VkMaxFramesInFlight);
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_result res;
|
||||||
|
if ((res = CreateInstance(&dev)) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
if ((res = CreateSurface(info, &dev)) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
if ((res = ChoosePhysicalDevice(&dev)) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
if ((res = CreateDevice(&dev)) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
if ((res = CreatePerFrameObjects(&dev)) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
if ((res = CreateSwapchain(&dev)) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
rt_create_vk_physical_resource_manager_result phys_res_mgr_res = rtCreateVkPhysicalResourceManager(&dev);
|
||||||
|
if ((res = phys_res_mgr_res.result) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
dev.phys_res_mgr = phys_res_mgr_res.phys_res_mgr;
|
||||||
|
rt_create_vk_bindless_registry_result bindless_registry_result = rtCreateVkBindlessRegistry(&dev);
|
||||||
|
if ((res = bindless_registry_result.result) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
dev.bindless_registry = bindless_registry_result.bindless_registry;
|
||||||
|
rt_create_vk_semaphore_pool_result semaphore_pool_result = rtCreateVkSemaphorePool(&dev, 128);
|
||||||
|
if ((res = semaphore_pool_result.result) != RT_SUCCESS)
|
||||||
|
goto out;
|
||||||
|
dev.semaphore_pool = semaphore_pool_result.semaphore_pool;
|
||||||
|
|
||||||
|
dev.created_pools = calloc(128 * 9, sizeof(VkCommandPool));
|
||||||
|
dev.created_pool_count = 0;
|
||||||
|
dev.max_created_pools = 128 * 9;
|
||||||
|
|
||||||
|
rt_time_delta initTime = rtTimeBetween(initBegin, rtTimeNow());
|
||||||
|
rtLog("VK", "Init complete. Took %lf seconds.", initTime);
|
||||||
|
|
||||||
|
out:
|
||||||
|
return (rt_create_vk_device_result) {.result = res, .device = dev};
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtDestroyVkDevice(rt_vk_device *dev) {
|
||||||
|
rtLog("VK", "Shutdown");
|
||||||
|
vkDeviceWaitIdle(dev->device);
|
||||||
|
free(dev->created_pools);
|
||||||
|
rtDestroyVkBindlessRegistry(&dev->bindless_registry);
|
||||||
|
rtDestroyVkPhysicalResourceManager(&dev->phys_res_mgr);
|
||||||
|
DestroySwapchain(dev);
|
||||||
|
DestroyPerFrameObjects(dev);
|
||||||
|
vkDestroyDevice(dev->device, dev->alloc_cb);
|
||||||
|
vkDestroySurfaceKHR(dev->instance, dev->surface, dev->alloc_cb);
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
vkDestroyDebugUtilsMessengerEXT(dev->instance, dev->messenger, dev->alloc_cb);
|
||||||
|
#endif
|
||||||
|
vkDestroyInstance(dev->instance, dev->alloc_cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* rt_render_device_i functions */
|
||||||
|
rt_physical_resource_manager_i rtVkDevGetPhysicalResourceManager(void *o) {
|
||||||
|
rt_vk_device *dev = o;
|
||||||
|
rt_physical_resource_manager_i iface = {
|
||||||
|
.o = &dev->phys_res_mgr,
|
||||||
|
.IsPresent = rtVkPhysicalResourceManagerIsPresent,
|
||||||
|
.Destroy = rtVkPhysicalResourceManagerDestroy,
|
||||||
|
.CreateBuffer = rtVkPhysicalResourceManagerCreateBuffer,
|
||||||
|
.CreateTexture2D = rtVkPhysicalResourceManagerCreateTexture2D,
|
||||||
|
};
|
||||||
|
return iface;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Commands */
|
||||||
|
static rt_result DrawIndirectImpl(rt_vk_device *dev, VkCommandBuffer cmdbuf, const void *_data) {
|
||||||
|
const rt_draw_indirect_data *data = _data;
|
||||||
|
const rt_vk_physical_resource *buffer_resource = rtGetVkPhysicalResource(&dev->phys_res_mgr, data->buffer);
|
||||||
|
if (!buffer_resource) {
|
||||||
|
rtLog("VK", "Invalid buffer in draw indirect data");
|
||||||
|
return RT_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
RT_ASSERT(buffer_resource->type == RT_RENDER_RESOURCE_TYPE_BUFFER,
|
||||||
|
"Buffer handle in draw indirect data does not reference a buffer object");
|
||||||
|
vkCmdDrawIndirect(cmdbuf, buffer_resource->buffer, data->offset, data->draw_count, data->stride);
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef rt_result rt_vk_render_command_impl_fn(rt_vk_device *dev, VkCommandBuffer cmdbuf, const void *data);
|
||||||
|
|
||||||
|
static size_t _command_data_sizes[RT_RENDER_COMMAND_COUNT] = {
|
||||||
|
sizeof(rt_draw_indirect_data),
|
||||||
|
};
|
||||||
|
|
||||||
|
static rt_vk_render_command_impl_fn *_command_impls[RT_RENDER_COMMAND_COUNT] = {
|
||||||
|
DrawIndirectImpl,
|
||||||
|
};
|
||||||
|
|
||||||
|
rt_result rtVkDevSubmitCommandList(void *o, const rt_render_command_list *list) {
|
||||||
|
rt_vk_device *dev = o;
|
||||||
|
size_t data_off = 0;
|
||||||
|
rt_result res = RT_SUCCESS;
|
||||||
|
|
||||||
|
/* Prepare command buffer */
|
||||||
|
VkQueue queue = VK_NULL_HANDLE;
|
||||||
|
VkCommandBuffer cmdbuf = VK_NULL_HANDLE;
|
||||||
|
if (list->target_queue == RT_RENDER_QUEUE_GRAPHICS) {
|
||||||
|
cmdbuf = rtAllocateComputeCommandBuffer(dev);
|
||||||
|
queue = dev->graphics_queue;
|
||||||
|
}
|
||||||
|
else if (list->target_queue == RT_RENDER_QUEUE_COMPUTE) {
|
||||||
|
cmdbuf = rtAllocateComputeCommandBuffer(dev);
|
||||||
|
queue = dev->compute_queue;
|
||||||
|
}
|
||||||
|
else if (list->target_queue == RT_RENDER_QUEUE_TRANSFER) {
|
||||||
|
cmdbuf = rtAllocateTransferCommandBuffer(dev);
|
||||||
|
queue = dev->transfer_queue;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RT_ASSERT("VK", "Invalid target queue");
|
||||||
|
return RT_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||||
|
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
|
||||||
|
if (vkBeginCommandBuffer(cmdbuf, &begin_info) != VK_SUCCESS) {
|
||||||
|
rtLog("VK", "vkBeginCommandBuffer failed");
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Iterate over all commands and decode them */
|
||||||
|
for (uint32_t i = 0; i < list->length; ++i) {
|
||||||
|
rt_render_command_header header = list->headers[i];
|
||||||
|
RT_ASSERT(header.type < RT_RENDER_COMMAND_COUNT,
|
||||||
|
"Invalid render command type.");
|
||||||
|
const void *data = (const char *)list->data + data_off;
|
||||||
|
res = _command_impls[header.type](dev, cmdbuf, data);
|
||||||
|
if (res != RT_SUCCESS)
|
||||||
|
break;
|
||||||
|
data_off += _command_data_sizes[header.type];
|
||||||
|
}
|
||||||
|
if (res != RT_SUCCESS) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End the command buffers and create submit info */
|
||||||
|
vkEndCommandBuffer(cmdbuf);
|
||||||
|
|
||||||
|
/* Submit it! */
|
||||||
|
VkCommandBufferSubmitInfo cmdbuf_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||||
|
.commandBuffer = cmdbuf
|
||||||
|
};
|
||||||
|
|
||||||
|
VkSubmitInfo2 submit_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
||||||
|
.commandBufferInfoCount = 1,
|
||||||
|
.pCommandBufferInfos = &cmdbuf_info,
|
||||||
|
.waitSemaphoreInfoCount = 0,
|
||||||
|
.pWaitSemaphoreInfos = NULL,
|
||||||
|
.signalSemaphoreInfoCount = 0,
|
||||||
|
.pSignalSemaphoreInfos = NULL,
|
||||||
|
};
|
||||||
|
if (vkQueueSubmit2(queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
|
||||||
|
rtLog("VK"," vkQueueSubmit2 failed");
|
||||||
|
res = RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
108
src/renderer/vk/device.h
Normal file
108
src/renderer/vk/device.h
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
#ifndef RT_VK_DEVICE_H
|
||||||
|
#define RT_VK_DEVICE_H
|
||||||
|
|
||||||
|
#include <volk/volk.h>
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <renderer/renderer.h>
|
||||||
|
#include <renderer/backend_api.h>
|
||||||
|
|
||||||
|
#include "physical_resource_manager.h"
|
||||||
|
#include "bindless_registry.h"
|
||||||
|
#include "command_buffers.h"
|
||||||
|
#include "semaphores.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
struct HINSTANCE__;
|
||||||
|
struct HWND__;
|
||||||
|
#elif defined(RT_USE_XLIB)
|
||||||
|
struct _XDisplay;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
#ifdef _WIN32
|
||||||
|
struct HINSTANCE__ *hInstance;
|
||||||
|
struct HWND__ *hWnd;
|
||||||
|
#elif defined(RT_USE_XLIB)
|
||||||
|
struct _XDisplay *display;
|
||||||
|
unsigned long window;
|
||||||
|
#endif
|
||||||
|
} rt_vk_native_window;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
VkSemaphore render_finished;
|
||||||
|
VkSemaphore image_available;
|
||||||
|
VkSemaphore swapchain_transitioned;
|
||||||
|
|
||||||
|
rt_vk_command_pool_array command_pools;
|
||||||
|
} rt_vk_frame_data;
|
||||||
|
|
||||||
|
typedef struct rt_vk_device {
|
||||||
|
/* *** Vulkan objects *** */
|
||||||
|
VkInstance instance;
|
||||||
|
VkDevice device;
|
||||||
|
VkPhysicalDevice phys_device;
|
||||||
|
VkAllocationCallbacks *alloc_cb;
|
||||||
|
VkSurfaceKHR surface;
|
||||||
|
|
||||||
|
/* *** Queues *** */
|
||||||
|
uint32_t graphics_family;
|
||||||
|
uint32_t compute_family;
|
||||||
|
uint32_t transfer_family;
|
||||||
|
uint32_t present_family;
|
||||||
|
uint32_t unique_families[4];
|
||||||
|
uint32_t unique_family_count;
|
||||||
|
VkQueue graphics_queue;
|
||||||
|
VkQueue compute_queue;
|
||||||
|
VkQueue transfer_queue;
|
||||||
|
VkQueue present_queue;
|
||||||
|
|
||||||
|
|
||||||
|
/* *** Properties and features *** */
|
||||||
|
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props;
|
||||||
|
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features;
|
||||||
|
VkPhysicalDeviceFeatures phys_device_features;
|
||||||
|
VkPhysicalDeviceProperties phys_device_props;
|
||||||
|
|
||||||
|
/* *** Per frame data *** */
|
||||||
|
uint32_t max_frames_in_flight;
|
||||||
|
rt_vk_frame_data frames[3];
|
||||||
|
uint32_t current_frame_id;
|
||||||
|
|
||||||
|
/* *** Windowing system *** */
|
||||||
|
rt_vk_native_window native_window;
|
||||||
|
VkSwapchainKHR swapchain;
|
||||||
|
VkImage swapchain_images[4];
|
||||||
|
VkImageView swapchain_image_views[4];
|
||||||
|
uint32_t swapchain_image_count;
|
||||||
|
|
||||||
|
/* *** Subsystems *** */
|
||||||
|
rt_vk_physical_resource_manager phys_res_mgr;
|
||||||
|
rt_vk_bindless_registry bindless_registry;
|
||||||
|
rt_vk_semaphore_pool semaphore_pool;
|
||||||
|
|
||||||
|
/* *** Save created command pools in a list to clean them up at exit */
|
||||||
|
VkCommandPool *created_pools;
|
||||||
|
long created_pool_count;
|
||||||
|
long max_created_pools;
|
||||||
|
|
||||||
|
/* *** Debug utils *** */
|
||||||
|
#ifdef RT_DEBUG
|
||||||
|
VkDebugUtilsMessengerEXT messenger;
|
||||||
|
#endif
|
||||||
|
} rt_vk_device;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_result result;
|
||||||
|
rt_vk_device device;
|
||||||
|
} rt_create_vk_device_result;
|
||||||
|
|
||||||
|
rt_create_vk_device_result rtCreateVkDevice(const rt_renderer_window_info *info);
|
||||||
|
|
||||||
|
void rtDestroyVkDevice(rt_vk_device *dev);
|
||||||
|
|
||||||
|
/* rt_render_device_i functions */
|
||||||
|
rt_physical_resource_manager_i rtVkDevGetPhysicalResourceManager(void *o);
|
||||||
|
rt_result rtVkDevSubmitCommandList(void *o, const rt_render_command_list *list);
|
||||||
|
|
||||||
|
#endif
|
71
src/renderer/vk/init.c
Normal file
71
src/renderer/vk/init.c
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#include <renderer/backend_api.h>
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/config.h>
|
||||||
|
|
||||||
|
#include "device.h"
|
||||||
|
|
||||||
|
extern rt_cvar r_VkEnableAPIAllocTracking;
|
||||||
|
extern rt_cvar r_VkPhysDeviceName;
|
||||||
|
extern rt_cvar r_VkMaxFramesInFlight;
|
||||||
|
extern rt_cvar r_VkMaxThreads;
|
||||||
|
extern rt_cvar r_VkPreferMailboxMode;
|
||||||
|
extern rt_cvar r_VkPreferRelaxedMode;
|
||||||
|
extern rt_cvar r_VkEnableVSync;
|
||||||
|
extern rt_cvar r_VkMaxResources;
|
||||||
|
extern rt_cvar r_VkBindlessUniformBufferDescriptors;
|
||||||
|
extern rt_cvar r_VkBindlessStorageBufferDescriptors;
|
||||||
|
extern rt_cvar r_VkBindlessSampledImageDescriptors;
|
||||||
|
extern rt_cvar r_VkBindlessStorageImageDescriptors;
|
||||||
|
extern rt_cvar r_VkBindlessSamplerDescriptors;
|
||||||
|
|
||||||
|
void VkRegisterCVARs(void) {
|
||||||
|
rtRegisterCVAR(&r_VkEnableAPIAllocTracking);
|
||||||
|
rtRegisterCVAR(&r_VkPhysDeviceName);
|
||||||
|
rtRegisterCVAR(&r_VkMaxFramesInFlight);
|
||||||
|
rtRegisterCVAR(&r_VkMaxThreads);
|
||||||
|
rtRegisterCVAR(&r_VkPreferMailboxMode);
|
||||||
|
rtRegisterCVAR(&r_VkPreferRelaxedMode);
|
||||||
|
rtRegisterCVAR(&r_VkEnableVSync);
|
||||||
|
rtRegisterCVAR(&r_VkMaxResources);
|
||||||
|
rtRegisterCVAR(&r_VkBindlessUniformBufferDescriptors);
|
||||||
|
rtRegisterCVAR(&r_VkBindlessStorageBufferDescriptors);
|
||||||
|
rtRegisterCVAR(&r_VkBindlessSampledImageDescriptors);
|
||||||
|
rtRegisterCVAR(&r_VkBindlessStorageImageDescriptors);
|
||||||
|
rtRegisterCVAR(&r_VkBindlessSamplerDescriptors);
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_vk_device _device;
|
||||||
|
|
||||||
|
rt_render_backend_init_result VkInit(const rt_renderer_window_info *info) {
|
||||||
|
rt_render_backend_init_result res = {.result = RT_SUCCESS};
|
||||||
|
|
||||||
|
rt_create_vk_device_result device_result = rtCreateVkDevice(info);
|
||||||
|
if (device_result.result != RT_SUCCESS) {
|
||||||
|
res.result = device_result.result;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
/* Populate the device interface */
|
||||||
|
_device = device_result.device;
|
||||||
|
rt_render_device_i device_i = {
|
||||||
|
.o = &_device,
|
||||||
|
.GetPhysicalResourceManager = rtVkDevGetPhysicalResourceManager,
|
||||||
|
.SubmitCommandList = rtVkDevSubmitCommandList,
|
||||||
|
};
|
||||||
|
res.device = device_i;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VkShutdown(void) {
|
||||||
|
rtDestroyVkDevice(&_device);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Called by the application to retrieve the renderer api
|
||||||
|
RT_DLLEXPORT rt_render_backend_api rtLoadRenderBackendImpl(void) {
|
||||||
|
rt_render_backend_api api = {
|
||||||
|
.RegisterCVARs = VkRegisterCVARs,
|
||||||
|
.Init = VkInit,
|
||||||
|
.Shutdown = VkShutdown,
|
||||||
|
};
|
||||||
|
return api;
|
||||||
|
}
|
45
src/renderer/vk/meson.build
Normal file
45
src/renderer/vk/meson.build
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
if get_option('build_vk')
|
||||||
|
vma_proj = subproject('vulkan-memory-allocator', default_options: ['warning_level=0', 'werror=false'])
|
||||||
|
vma_dep = vma_proj.get_variable('vma_allocator_dep')
|
||||||
|
|
||||||
|
vk_inc_proj = subproject('vulkan-headers')
|
||||||
|
vk_inc_dep = vk_inc_proj.get_variable('vulkan_headers_dep')
|
||||||
|
|
||||||
|
platform_defs = []
|
||||||
|
if get_option('use_xlib')
|
||||||
|
platform_defs = ['-DVK_USE_PLATFORM_XLIB_KHR']
|
||||||
|
elif host_machine.system() == 'windows'
|
||||||
|
platform_defs = ['-DVK_USE_PLATFORM_WIN32_KHR']
|
||||||
|
endif
|
||||||
|
|
||||||
|
vk_renderer_lib = library('rtvk',
|
||||||
|
'bindless_registry.h',
|
||||||
|
'command_buffers.h',
|
||||||
|
'device.h',
|
||||||
|
'physical_resource_manager.h',
|
||||||
|
'semaphores.h',
|
||||||
|
'utils.h',
|
||||||
|
|
||||||
|
'bindless_registry.c',
|
||||||
|
'command_buffers.c',
|
||||||
|
'device.c',
|
||||||
|
'init.c',
|
||||||
|
'physical_resource_manager.c',
|
||||||
|
'semaphores.c',
|
||||||
|
'swapchain.c',
|
||||||
|
'utils.c',
|
||||||
|
'vma_impl.cpp',
|
||||||
|
|
||||||
|
'../../../contrib/volk/volk.c',
|
||||||
|
'../../../contrib/volk/volk.h',
|
||||||
|
dependencies: [m_dep, vk_inc_dep, vma_dep, thread_dep],
|
||||||
|
include_directories: [engine_incdir, contrib_incdir],
|
||||||
|
link_with: [runtime_lib],
|
||||||
|
c_pch: 'pch/vk_pch.h',
|
||||||
|
c_args: platform_defs,
|
||||||
|
install: true)
|
||||||
|
|
||||||
|
engine_libs += vk_renderer_lib
|
||||||
|
engine_lib_paths += vk_renderer_lib.full_path()
|
||||||
|
endif
|
||||||
|
|
3
src/renderer/vk/pch/vk_pch.h
Normal file
3
src/renderer/vk/pch/vk_pch.h
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#include <volk/volk.h>
|
||||||
|
|
||||||
|
#include <runtime/runtime.h>
|
330
src/renderer/vk/physical_resource_manager.c
Normal file
330
src/renderer/vk/physical_resource_manager.c
Normal file
@ -0,0 +1,330 @@
|
|||||||
|
#include <runtime/config.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "device.h"
|
||||||
|
#include "physical_resource_manager.h"
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
RT_CVAR_SZ(r_VkMaxResources,
|
||||||
|
"Maximum number of simulaneously existing resources. (Default: 4096)",
|
||||||
|
4096);
|
||||||
|
|
||||||
|
static VmaAllocator CreateAllocator(rt_vk_device *dev) {
|
||||||
|
#define SET_FNC(name) fncs.name = name
|
||||||
|
#define SET_KHR_FNC(name) (fncs).name##KHR = name
|
||||||
|
VmaVulkanFunctions fncs = {NULL};
|
||||||
|
SET_FNC(vkGetInstanceProcAddr);
|
||||||
|
SET_FNC(vkGetDeviceProcAddr);
|
||||||
|
SET_FNC(vkGetPhysicalDeviceProperties);
|
||||||
|
SET_FNC(vkGetPhysicalDeviceMemoryProperties);
|
||||||
|
SET_FNC(vkAllocateMemory);
|
||||||
|
SET_FNC(vkFreeMemory);
|
||||||
|
SET_FNC(vkMapMemory);
|
||||||
|
SET_FNC(vkUnmapMemory);
|
||||||
|
SET_FNC(vkFlushMappedMemoryRanges);
|
||||||
|
SET_FNC(vkInvalidateMappedMemoryRanges);
|
||||||
|
SET_FNC(vkBindBufferMemory);
|
||||||
|
SET_FNC(vkBindImageMemory);
|
||||||
|
SET_FNC(vkGetBufferMemoryRequirements);
|
||||||
|
SET_FNC(vkGetImageMemoryRequirements);
|
||||||
|
SET_FNC(vkCreateBuffer);
|
||||||
|
SET_FNC(vkDestroyBuffer);
|
||||||
|
SET_FNC(vkCreateImage);
|
||||||
|
SET_FNC(vkDestroyImage);
|
||||||
|
SET_FNC(vkCmdCopyBuffer);
|
||||||
|
SET_KHR_FNC(vkGetBufferMemoryRequirements2);
|
||||||
|
SET_KHR_FNC(vkGetImageMemoryRequirements2);
|
||||||
|
SET_KHR_FNC(vkBindBufferMemory2);
|
||||||
|
SET_KHR_FNC(vkBindImageMemory2);
|
||||||
|
SET_KHR_FNC(vkGetPhysicalDeviceMemoryProperties2);
|
||||||
|
SET_FNC(vkGetDeviceBufferMemoryRequirements);
|
||||||
|
SET_FNC(vkGetDeviceImageMemoryRequirements);
|
||||||
|
#undef SET_FNC
|
||||||
|
#undef SET_KHR_FNC
|
||||||
|
|
||||||
|
VmaAllocatorCreateInfo allocator_info = {
|
||||||
|
.instance = dev->instance,
|
||||||
|
.physicalDevice = dev->phys_device,
|
||||||
|
.device = dev->device,
|
||||||
|
.pAllocationCallbacks = dev->alloc_cb,
|
||||||
|
.vulkanApiVersion = VK_API_VERSION_1_3,
|
||||||
|
.pVulkanFunctions = &fncs,
|
||||||
|
};
|
||||||
|
|
||||||
|
VmaAllocator allocator;
|
||||||
|
if (vmaCreateAllocator(&allocator_info, &allocator) != VK_SUCCESS)
|
||||||
|
return NULL;
|
||||||
|
return allocator;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_create_vk_physical_resource_manager_result rtCreateVkPhysicalResourceManager(struct rt_vk_device *dev) {
|
||||||
|
rt_vk_physical_resource_manager phys_res_mgr;
|
||||||
|
phys_res_mgr.dev = dev;
|
||||||
|
phys_res_mgr.allocator = CreateAllocator(dev);
|
||||||
|
if (!phys_res_mgr.allocator) {
|
||||||
|
return (rt_create_vk_physical_resource_manager_result){.result = RT_UNKNOWN_ERROR};
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
||||||
|
if (!lock_res.ok) {
|
||||||
|
vmaDestroyAllocator(phys_res_mgr.allocator);
|
||||||
|
return (rt_create_vk_physical_resource_manager_result){.result = RT_UNKNOWN_ERROR};
|
||||||
|
}
|
||||||
|
phys_res_mgr.lock = lock_res.lock;
|
||||||
|
|
||||||
|
phys_res_mgr.resources = calloc(r_VkMaxResources.sz, sizeof(rt_vk_physical_resource));
|
||||||
|
if (!phys_res_mgr.resources) {
|
||||||
|
rtDestroyRWLock(&phys_res_mgr.lock);
|
||||||
|
vmaDestroyAllocator(phys_res_mgr.allocator);
|
||||||
|
return (rt_create_vk_physical_resource_manager_result){.result = RT_OUT_OF_MEMORY};
|
||||||
|
}
|
||||||
|
void *lut_mem = malloc(RT_HASH_TABLE_MEMORY_REQUIRED(2 * r_VkMaxResources.sz));
|
||||||
|
if (!lut_mem) {
|
||||||
|
free(phys_res_mgr.resources);
|
||||||
|
rtDestroyRWLock(&phys_res_mgr.lock);
|
||||||
|
vmaDestroyAllocator(phys_res_mgr.allocator);
|
||||||
|
return (rt_create_vk_physical_resource_manager_result){.result = RT_OUT_OF_MEMORY};
|
||||||
|
}
|
||||||
|
phys_res_mgr.resource_lut = rtCreateHashtable(2 * r_VkMaxResources.sz, lut_mem, NULL, NULL);
|
||||||
|
phys_res_mgr.lut_mem = lut_mem;
|
||||||
|
|
||||||
|
phys_res_mgr.free_slots = calloc(r_VkMaxResources.sz, sizeof(uint32_t));
|
||||||
|
if (!phys_res_mgr.free_slots) {
|
||||||
|
free(phys_res_mgr.lut_mem);
|
||||||
|
free(phys_res_mgr.resources);
|
||||||
|
rtDestroyRWLock(&phys_res_mgr.lock);
|
||||||
|
vmaDestroyAllocator(phys_res_mgr.allocator);
|
||||||
|
return (rt_create_vk_physical_resource_manager_result){.result = RT_OUT_OF_MEMORY};
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < r_VkMaxResources.sz; ++i) {
|
||||||
|
phys_res_mgr.free_slots[i] = i;
|
||||||
|
}
|
||||||
|
phys_res_mgr.free_slot_count = r_VkMaxResources.sz;
|
||||||
|
|
||||||
|
/* TODO: Create pools for transient resources. */
|
||||||
|
|
||||||
|
return (rt_create_vk_physical_resource_manager_result){.result = RT_SUCCESS,
|
||||||
|
.phys_res_mgr = phys_res_mgr};
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtDestroyVkPhysicalResourceManager(rt_vk_physical_resource_manager *phys_res_mgr) {
|
||||||
|
if (phys_res_mgr->free_slot_count < r_VkMaxResources.sz) {
|
||||||
|
rtReportError("VK", "Destroyed the physical resource manager, but there are still resources left.");
|
||||||
|
}
|
||||||
|
free(phys_res_mgr->free_slots);
|
||||||
|
free(phys_res_mgr->lut_mem);
|
||||||
|
free(phys_res_mgr->resources);
|
||||||
|
vmaDestroyAllocator(phys_res_mgr->allocator);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool rtVkPhysicalResourceManagerIsPresent(void *o, rt_render_resource_handle handle) {
|
||||||
|
rt_vk_physical_resource_manager *phys_res_mgr = o;
|
||||||
|
rtLockRead(&phys_res_mgr->lock);
|
||||||
|
bool is_present = rtHashtableLookup(&phys_res_mgr->resource_lut, (uint64_t)handle.value, UINT64_MAX) != UINT64_MAX;
|
||||||
|
rtUnlockRead(&phys_res_mgr->lock);
|
||||||
|
return is_present;
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtVkPhysicalResourceManagerDestroy(void *o, rt_render_resource_handle handle) {
|
||||||
|
rt_vk_physical_resource_manager *phys_res_mgr = o;
|
||||||
|
rtLockWrite(&phys_res_mgr->lock);
|
||||||
|
uint32_t slot = rtHashtableLookup(&phys_res_mgr->resource_lut, handle.value, UINT32_MAX);
|
||||||
|
if (slot != UINT32_MAX) {
|
||||||
|
switch (phys_res_mgr->resources[slot].type) {
|
||||||
|
case RT_RENDER_RESOURCE_TYPE_BUFFER: {
|
||||||
|
vmaDestroyBuffer(phys_res_mgr->allocator, phys_res_mgr->resources[slot].buffer, phys_res_mgr->resources[slot].allocation);
|
||||||
|
} break;
|
||||||
|
case RT_RENDER_RESOURCE_TYPE_TEXTURE2D: {
|
||||||
|
RT_NOT_IMPLEMENTED;
|
||||||
|
} break;
|
||||||
|
default: {
|
||||||
|
rtReportError("VK", "Tried to destroy a resource, but the type is not valid.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
phys_res_mgr->free_slots[phys_res_mgr->free_slot_count++] = slot;
|
||||||
|
}
|
||||||
|
rtUnlockWrite(&phys_res_mgr->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_vk_physical_resource *rtGetVkPhysicalResource(rt_vk_physical_resource_manager *phys_res_mgr, rt_render_resource_handle handle) {
|
||||||
|
rt_vk_physical_resource *resource = NULL;
|
||||||
|
rtLockRead(&phys_res_mgr->lock);
|
||||||
|
uint32_t slot = rtHashtableLookup(&phys_res_mgr->resource_lut, handle.value, UINT32_MAX);
|
||||||
|
if (slot != UINT32_MAX) {
|
||||||
|
resource = &phys_res_mgr->resources[slot];
|
||||||
|
}
|
||||||
|
rtUnlockRead(&phys_res_mgr->lock);
|
||||||
|
return resource;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Call this with a held write lock! */
|
||||||
|
static uint32_t AllocStorageSlot(rt_vk_physical_resource_manager *phys_res_mgr, rt_render_resource_handle h) {
|
||||||
|
if (phys_res_mgr->free_slot_count > 0u) {
|
||||||
|
uint32_t slot = phys_res_mgr->free_slots[--phys_res_mgr->free_slot_count];
|
||||||
|
/* The hashtable is large enough that this should never fail */
|
||||||
|
rt_result insert_res = rtHashtableInsert(&phys_res_mgr->resource_lut, (uint64_t)h.value, (uint64_t)slot);
|
||||||
|
RT_UNUSED(insert_res);
|
||||||
|
RT_VERIFY(insert_res == RT_SUCCESS);
|
||||||
|
return slot;
|
||||||
|
}
|
||||||
|
return UINT32_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_result rtVkPhysicalResourceManagerCreateBuffer(void *o, rt_render_resource_handle h, const rt_render_buffer_desc *desc) {
|
||||||
|
rt_vk_physical_resource_manager *phys_res_mgr = o;
|
||||||
|
|
||||||
|
VkBufferUsageFlags usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||||
|
if ((desc->usage & RT_RENDER_BUFFER_USAGE_INDEX_BUFFER) != 0)
|
||||||
|
usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
|
||||||
|
if ((desc->usage & RT_RENDER_BUFFER_USAGE_VERTEX_BUFFER) != 0)
|
||||||
|
usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
||||||
|
if ((desc->usage & RT_RENDER_BUFFER_USAGE_STORAGE_BUFFER) != 0)
|
||||||
|
usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||||
|
if ((desc->usage & RT_RENDER_BUFFER_USAGE_UPLOAD_BUFFER) != 0) {
|
||||||
|
usage &= ~VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||||
|
usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkBufferCreateInfo buffer_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
|
.size = desc->size,
|
||||||
|
.usage = usage,
|
||||||
|
.sharingMode = VK_SHARING_MODE_CONCURRENT,
|
||||||
|
.pQueueFamilyIndices = phys_res_mgr->dev->unique_families,
|
||||||
|
.queueFamilyIndexCount = phys_res_mgr->dev->unique_family_count,
|
||||||
|
};
|
||||||
|
|
||||||
|
VmaAllocationCreateFlags alloc_flags = 0;
|
||||||
|
if (desc->access == RT_RENDER_BUFFER_ACCESS_CPU_AND_GPU)
|
||||||
|
alloc_flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
|
||||||
|
|
||||||
|
VmaAllocationCreateInfo alloc_info = {
|
||||||
|
.usage = VMA_MEMORY_USAGE_AUTO,
|
||||||
|
.flags = alloc_flags,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkBuffer buffer;
|
||||||
|
VmaAllocation allocation;
|
||||||
|
if (vmaCreateBuffer(phys_res_mgr->allocator, &buffer_info, &alloc_info, &buffer, &allocation, NULL) != VK_SUCCESS) {
|
||||||
|
rtLog("VK", "Failed to create buffer.");
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Store */
|
||||||
|
rt_result res = RT_SUCCESS;
|
||||||
|
rtLockWrite(&phys_res_mgr->lock);
|
||||||
|
uint32_t slot = AllocStorageSlot(phys_res_mgr, h);
|
||||||
|
if (slot != UINT32_MAX) {
|
||||||
|
phys_res_mgr->resources[slot].type = RT_RENDER_RESOURCE_TYPE_BUFFER;
|
||||||
|
phys_res_mgr->resources[slot].buffer = buffer;
|
||||||
|
phys_res_mgr->resources[slot].allocation = allocation;
|
||||||
|
} else {
|
||||||
|
res = RT_OUT_OF_MEMORY;
|
||||||
|
vmaDestroyBuffer(phys_res_mgr->allocator, buffer, allocation);
|
||||||
|
rtLog("VK","Could not create buffer because no storage space is available.");
|
||||||
|
}
|
||||||
|
rtUnlockWrite(&phys_res_mgr->lock);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkFormat RtTextureFormatToVkFormat(rt_texture_format texture_format) {
|
||||||
|
RT_ASSERT(texture_format < RT_TEXTURE_FORMAT_MAX, "Invalid format");
|
||||||
|
VkFormat formats[RT_TEXTURE_FORMAT_MAX] = {
|
||||||
|
VK_FORMAT_B8G8R8A8_SRGB, // RT_TEXTURE_FORMAT_B8G8R8A8_SRGB
|
||||||
|
};
|
||||||
|
return formats[texture_format];
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkImageUsageFlagBits RtTextureUsageToVkImageUsage(rt_texture_usage_flags usage_flags) {
|
||||||
|
VkImageUsageFlagBits usage = 0;
|
||||||
|
if ((usage_flags & RT_TEXTURE_USAGE_COLOR_ATTACHMENT) != 0)
|
||||||
|
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||||
|
if ((usage_flags & RT_TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT) != 0)
|
||||||
|
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||||
|
if ((usage_flags & RT_TEXTURE_USAGE_SAMPLED_IMAGE) != 0)
|
||||||
|
usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||||
|
if ((usage_flags & RT_TEXTURE_USAGE_STORAGE_IMAGE) != 0)
|
||||||
|
usage |= VK_IMAGE_USAGE_STORAGE_BIT;
|
||||||
|
return usage;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_result rtVkPhysicalResourceManagerCreateTexture2D(void *o, rt_render_resource_handle h, const rt_render_texture2d_desc *desc) {
|
||||||
|
rt_vk_physical_resource_manager *phys_res_mgr = o;
|
||||||
|
|
||||||
|
VkFormat format = RtTextureFormatToVkFormat(desc->format);
|
||||||
|
VkExtent3D extent = {.width = desc->width, .height = desc->height, .depth = 1};
|
||||||
|
VkImageUsageFlagBits usage = RtTextureUsageToVkImageUsage(desc->usage);
|
||||||
|
|
||||||
|
VkImageCreateInfo image_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||||
|
.extent = extent,
|
||||||
|
.format = format,
|
||||||
|
.imageType = VK_IMAGE_TYPE_2D,
|
||||||
|
.sharingMode = VK_SHARING_MODE_CONCURRENT,
|
||||||
|
.queueFamilyIndexCount = phys_res_mgr->dev->unique_family_count,
|
||||||
|
.pQueueFamilyIndices = &phys_res_mgr->dev->unique_families[0],
|
||||||
|
.mipLevels = 1,
|
||||||
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||||
|
.arrayLayers = 1,
|
||||||
|
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
|
.usage = usage,
|
||||||
|
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||||
|
};
|
||||||
|
|
||||||
|
VmaAllocationCreateInfo alloc_info = {
|
||||||
|
.usage = VMA_MEMORY_USAGE_AUTO,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkImage image;
|
||||||
|
VmaAllocation allocation;
|
||||||
|
if (vmaCreateImage(phys_res_mgr->allocator, &image_info, &alloc_info, &image, &allocation, NULL) != VK_SUCCESS) {
|
||||||
|
rtLog("VK", "Failed to create image.");
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkImageViewCreateInfo view_info ={
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||||
|
.format = format,
|
||||||
|
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||||
|
.subresourceRange = {
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.levelCount = 1,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.layerCount = 1,
|
||||||
|
.baseArrayLayer = 0
|
||||||
|
},
|
||||||
|
.components = {
|
||||||
|
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.a = VK_COMPONENT_SWIZZLE_IDENTITY
|
||||||
|
},
|
||||||
|
};
|
||||||
|
VkImageView view;
|
||||||
|
if (vkCreateImageView(phys_res_mgr->dev->device, &view_info, phys_res_mgr->dev->alloc_cb, &view) != VK_SUCCESS) {
|
||||||
|
vmaDestroyImage(phys_res_mgr->allocator, image, allocation);
|
||||||
|
rtLog("VK","Failed to create image view.");
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Store */
|
||||||
|
rt_result res = RT_SUCCESS;
|
||||||
|
rtLockWrite(&phys_res_mgr->lock);
|
||||||
|
uint32_t slot = AllocStorageSlot(phys_res_mgr, h);
|
||||||
|
if (slot != UINT32_MAX) {
|
||||||
|
phys_res_mgr->resources[slot].type = RT_RENDER_RESOURCE_TYPE_BUFFER;
|
||||||
|
phys_res_mgr->resources[slot].texture2d.image = image;
|
||||||
|
phys_res_mgr->resources[slot].texture2d.view = view;
|
||||||
|
phys_res_mgr->resources[slot].allocation = allocation;
|
||||||
|
} else {
|
||||||
|
res = RT_OUT_OF_MEMORY;
|
||||||
|
vkDestroyImageView(phys_res_mgr->dev->device, view, phys_res_mgr->dev->alloc_cb);
|
||||||
|
vmaDestroyImage(phys_res_mgr->allocator, image, allocation);
|
||||||
|
rtLog("VK","Could not create image because no storage space is available.");
|
||||||
|
}
|
||||||
|
rtUnlockWrite(&phys_res_mgr->lock);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
63
src/renderer/vk/physical_resource_manager.h
Normal file
63
src/renderer/vk/physical_resource_manager.h
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
#ifndef RT_VK_PHYSICAL_RESOURCE_MANAGER_H
|
||||||
|
#define RT_VK_PHYSICAL_RESOURCE_MANAGER_H
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <runtime/ds.h>
|
||||||
|
#include <runtime/threading.h>
|
||||||
|
#include <renderer/backend_api.h>
|
||||||
|
|
||||||
|
#include <volk/volk.h>
|
||||||
|
|
||||||
|
#define VMA_STATIC_VULKAN_FUNCTIONS 0
|
||||||
|
#define VMA_DYNAMI_VULKAN_FUNCTIONS 0
|
||||||
|
#include <vk_mem_alloc.h>
|
||||||
|
|
||||||
|
struct rt_vk_device;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_render_resource_type type;
|
||||||
|
VmaAllocation allocation;
|
||||||
|
|
||||||
|
union {
|
||||||
|
VkBuffer buffer;
|
||||||
|
struct {
|
||||||
|
VkImage image;
|
||||||
|
VkImageView view;
|
||||||
|
} texture2d;
|
||||||
|
};
|
||||||
|
} rt_vk_physical_resource;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
struct rt_vk_device *dev;
|
||||||
|
VmaAllocator allocator;
|
||||||
|
|
||||||
|
/* Maps handles to slots inside resources */
|
||||||
|
rt_hashtable resource_lut;
|
||||||
|
rt_vk_physical_resource *resources;
|
||||||
|
void *lut_mem;
|
||||||
|
|
||||||
|
uint32_t *free_slots;
|
||||||
|
uint32_t free_slot_count;
|
||||||
|
|
||||||
|
rt_rwlock lock;
|
||||||
|
} rt_vk_physical_resource_manager;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_result result;
|
||||||
|
rt_vk_physical_resource_manager phys_res_mgr;
|
||||||
|
} rt_create_vk_physical_resource_manager_result;
|
||||||
|
|
||||||
|
rt_create_vk_physical_resource_manager_result rtCreateVkPhysicalResourceManager(struct rt_vk_device *dev);
|
||||||
|
|
||||||
|
void rtDestroyVkPhysicalResourceManager(rt_vk_physical_resource_manager *phys_res_mgr);
|
||||||
|
|
||||||
|
rt_vk_physical_resource *rtGetVkPhysicalResource(rt_vk_physical_resource_manager *phys_res_mgr, rt_render_resource_handle handle);
|
||||||
|
|
||||||
|
/* rt_physical_resource_manager_i functions */
|
||||||
|
bool rtVkPhysicalResourceManagerIsPresent(void *o, rt_render_resource_handle handle);
|
||||||
|
void rtVkPhysicalResourceManagerDestroy(void *o, rt_render_resource_handle handle);
|
||||||
|
rt_result rtVkPhysicalResourceManagerCreateBuffer(void *o, rt_render_resource_handle h, const rt_render_buffer_desc *desc);
|
||||||
|
rt_result rtVkPhysicalResourceManagerCreateTexture2D(void *o, rt_render_resource_handle h, const rt_render_texture2d_desc *desc);
|
||||||
|
|
||||||
|
#endif
|
115
src/renderer/vk/semaphores.c
Normal file
115
src/renderer/vk/semaphores.c
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
#include "semaphores.h"
|
||||||
|
#include "device.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
#include <runtime/atomics.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
rt_create_vk_semaphore_pool_result rtCreateVkSemaphorePool(rt_vk_device *dev, uint32_t initial_size) {
|
||||||
|
rt_vk_semaphore_pool sem_pool;
|
||||||
|
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
||||||
|
if (!lock_res.ok) {
|
||||||
|
return (rt_create_vk_semaphore_pool_result){.result = RT_UNKNOWN_ERROR};
|
||||||
|
}
|
||||||
|
sem_pool.resize_lock = lock_res.lock;
|
||||||
|
sem_pool.dev = dev;
|
||||||
|
sem_pool.acquire_index = 0;
|
||||||
|
sem_pool.size = initial_size;
|
||||||
|
sem_pool.semaphores = calloc(initial_size, sizeof(rt_vk_semaphore));
|
||||||
|
if (!sem_pool.semaphores)
|
||||||
|
return (rt_create_vk_semaphore_pool_result){.result = RT_OUT_OF_MEMORY};
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < initial_size; ++i) {
|
||||||
|
VkSemaphoreTypeCreateInfo type_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
|
||||||
|
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
|
||||||
|
.initialValue = 0,
|
||||||
|
};
|
||||||
|
VkSemaphoreCreateInfo semaphore_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||||
|
.flags = 0,
|
||||||
|
.pNext = &type_info
|
||||||
|
};
|
||||||
|
|
||||||
|
VkResult res = vkCreateSemaphore(dev->device, &semaphore_info, dev->alloc_cb, &sem_pool.semaphores[i].semaphore);
|
||||||
|
if (res != VK_SUCCESS) {
|
||||||
|
for (uint32_t j = 0; j < i; ++j) {
|
||||||
|
vkDestroySemaphore(dev->device, sem_pool.semaphores[i].semaphore, dev->alloc_cb);
|
||||||
|
}
|
||||||
|
free(sem_pool.semaphores);
|
||||||
|
return (rt_create_vk_semaphore_pool_result){.result = rtVkResultToRTResult(res)};
|
||||||
|
}
|
||||||
|
sem_pool.semaphores[i].value = 0;
|
||||||
|
sem_pool.semaphores[i].in_use = RT_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (rt_create_vk_semaphore_pool_result){
|
||||||
|
.result = RT_SUCCESS,
|
||||||
|
.semaphore_pool = sem_pool,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtDestroyVkSemaphorePool(rt_vk_semaphore_pool *pool) {
|
||||||
|
for (uint32_t i = 0; i < pool->size; ++i) {
|
||||||
|
vkDestroySemaphore(pool->dev->device, pool->semaphores[i].semaphore, pool->dev->alloc_cb);
|
||||||
|
}
|
||||||
|
free(pool->semaphores);
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_vk_semaphore *rtAcquireSemaphore(rt_vk_semaphore_pool *pool) {
|
||||||
|
rtLockRead(&pool->resize_lock);
|
||||||
|
uint32_t index = rtAtomic32Inc(&pool->acquire_index) % pool->size;
|
||||||
|
if (rtAtomic32CAS(&pool->semaphores[index].in_use, RT_TRUE, RT_FALSE) == RT_FALSE) {
|
||||||
|
/* Successfully acquired the semaphore */
|
||||||
|
rtUnlockRead(&pool->resize_lock);
|
||||||
|
return &pool->semaphores[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We need to resize the pool */
|
||||||
|
uint32_t pre_resize_size = pool->size;
|
||||||
|
rtUnlockRead(&pool->resize_lock);
|
||||||
|
rtLockWrite(&pool->resize_lock);
|
||||||
|
if (pool->size > pre_resize_size) {
|
||||||
|
/* Someone else was faster. Just try again */
|
||||||
|
rtUnlockWrite(&pool->resize_lock);
|
||||||
|
return rtAcquireSemaphore(pool);
|
||||||
|
}
|
||||||
|
uint32_t new_size = pre_resize_size * 2;
|
||||||
|
rt_vk_semaphore *tmp = realloc(pool->semaphores, sizeof(rt_vk_semaphore) * new_size);
|
||||||
|
if (!tmp) {
|
||||||
|
rtUnlockWrite(&pool->resize_lock);
|
||||||
|
rtReportError("VK", "Tried to grow the semaphore pool, but ran out of memory.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
pool->semaphores = tmp;
|
||||||
|
/* Create new semaphores */
|
||||||
|
for (uint32_t i = pre_resize_size; i < new_size; ++i) {
|
||||||
|
VkSemaphoreTypeCreateInfo type_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
|
||||||
|
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
|
||||||
|
.initialValue = 0,
|
||||||
|
};
|
||||||
|
VkSemaphoreCreateInfo semaphore_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||||
|
.flags = 0,
|
||||||
|
.pNext = &type_info
|
||||||
|
};
|
||||||
|
|
||||||
|
VkResult res = vkCreateSemaphore(pool->dev->device, &semaphore_info, pool->dev->alloc_cb, &pool->semaphores[i].semaphore);
|
||||||
|
if (res != VK_SUCCESS) {
|
||||||
|
for (uint32_t j = pre_resize_size; j < i; ++j) {
|
||||||
|
vkDestroySemaphore(pool->dev->device, pool->semaphores[i].semaphore, pool->dev->alloc_cb);
|
||||||
|
}
|
||||||
|
rtReportError("VK", "Tried to grow the semaphore pool, but failed to create new semaphores");
|
||||||
|
}
|
||||||
|
pool->semaphores[i].value = 0;
|
||||||
|
pool->semaphores[i].in_use = RT_FALSE;
|
||||||
|
}
|
||||||
|
pool->size = new_size;
|
||||||
|
rtUnlockWrite(&pool->resize_lock);
|
||||||
|
return rtAcquireSemaphore(pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
void rtReleaseSemaphore(rt_vk_semaphore *semaphore) {
|
||||||
|
rtAtomic32Exchange(&semaphore->in_use, RT_FALSE);
|
||||||
|
}
|
47
src/renderer/vk/semaphores.h
Normal file
47
src/renderer/vk/semaphores.h
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#ifndef RT_VK_SEMAPHORES_H
|
||||||
|
#define RT_VK_SEMAPHORES_H
|
||||||
|
|
||||||
|
#include <volk/volk.h>
|
||||||
|
#include <runtime/threading.h>
|
||||||
|
|
||||||
|
struct rt_vk_device;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
/* A timeline semaphore */
|
||||||
|
VkSemaphore semaphore;
|
||||||
|
|
||||||
|
/* It's current value, or the next value it will signal */
|
||||||
|
uint64_t value;
|
||||||
|
|
||||||
|
rt_bool32 in_use;
|
||||||
|
} rt_vk_semaphore;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
struct rt_vk_device *dev;
|
||||||
|
|
||||||
|
rt_vk_semaphore *semaphores;
|
||||||
|
|
||||||
|
/* Number of semaphores inside the pool */
|
||||||
|
uint32_t size;
|
||||||
|
|
||||||
|
/* Running index of acquire operations. Atomically incremented to fetch a "new" semaphore */
|
||||||
|
uint32_t acquire_index;
|
||||||
|
|
||||||
|
/* Locked as writing when resizing the array. During normal usage, this gets a read lock */
|
||||||
|
rt_rwlock resize_lock;
|
||||||
|
} rt_vk_semaphore_pool;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_result result;
|
||||||
|
rt_vk_semaphore_pool semaphore_pool;
|
||||||
|
} rt_create_vk_semaphore_pool_result;
|
||||||
|
|
||||||
|
rt_create_vk_semaphore_pool_result rtCreateVkSemaphorePool(struct rt_vk_device *device, uint32_t initial_size);
|
||||||
|
|
||||||
|
void rtDestroyVkSemaphorePool(rt_vk_semaphore_pool *pool);
|
||||||
|
|
||||||
|
rt_vk_semaphore *rtAcquireSemaphore(rt_vk_semaphore_pool *pool);
|
||||||
|
|
||||||
|
void rtReleaseSemaphore(rt_vk_semaphore *semaphore);
|
||||||
|
|
||||||
|
#endif
|
176
src/renderer/vk/swapchain.c
Normal file
176
src/renderer/vk/swapchain.c
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
#include <runtime/mem_arena.h>
|
||||||
|
#include <runtime/config.h>
|
||||||
|
|
||||||
|
#include "device.h"
|
||||||
|
|
||||||
|
RT_CVAR_I(r_VkPreferMailboxMode, "Prefer the mailbox present mode. [0/1] (Default: 1)", 1);
|
||||||
|
RT_CVAR_I(r_VkPreferRelaxedMode, "Prefer the relaxed FIFO present mode. [0/1] (Default: 0)", 0);
|
||||||
|
RT_CVAR_I(r_VkEnableVSync, "Enable VSync. [0/1] (Default: 0)", 0);
|
||||||
|
|
||||||
|
rt_result CreateSwapchain(rt_vk_device *dev) {
|
||||||
|
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||||
|
uint32_t format_count = 0;
|
||||||
|
if (vkGetPhysicalDeviceSurfaceFormatsKHR(dev->phys_device, dev->surface, &format_count, NULL) != VK_SUCCESS) {
|
||||||
|
rtReturnTemporaryArena(temp);
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
VkSurfaceFormatKHR *formats = RT_ARENA_PUSH_ARRAY(temp.arena, VkSurfaceFormatKHR, format_count);
|
||||||
|
if (vkGetPhysicalDeviceSurfaceFormatsKHR(dev->phys_device, dev->surface, &format_count, formats) != VK_SUCCESS) {
|
||||||
|
rtReturnTemporaryArena(temp);
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pick a format */
|
||||||
|
VkFormat surface_format = formats[0].format;
|
||||||
|
VkColorSpaceKHR color_space = formats[0].colorSpace;
|
||||||
|
for (uint32_t i = 0; i < format_count; ++i) {
|
||||||
|
if (formats[i].format == VK_FORMAT_B8G8R8A8_SRGB && formats[i].colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
|
||||||
|
surface_format = formats[i].format;
|
||||||
|
color_space = formats[i].colorSpace;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Determine the number of images to create */
|
||||||
|
uint32_t num_images = dev->max_frames_in_flight + 1;
|
||||||
|
VkSurfaceCapabilitiesKHR surface_capabilities;
|
||||||
|
vkGetPhysicalDeviceSurfaceCapabilitiesKHR(dev->phys_device, dev->surface, &surface_capabilities);
|
||||||
|
if (surface_capabilities.maxImageCount > 0 && surface_capabilities.maxImageCount < dev->max_frames_in_flight) {
|
||||||
|
dev->max_frames_in_flight = surface_capabilities.maxImageCount;
|
||||||
|
num_images = dev->max_frames_in_flight;
|
||||||
|
rtLog("VK", "Limiting number of frames in flight to maximum number of swapchain images %u.", dev->max_frames_in_flight);
|
||||||
|
}
|
||||||
|
if (surface_capabilities.maxImageCount > 0 && num_images > surface_capabilities.maxImageCount) {
|
||||||
|
num_images = surface_capabilities.maxImageCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Determine the extent */
|
||||||
|
VkExtent2D extent = surface_capabilities.currentExtent;
|
||||||
|
if (surface_capabilities.currentExtent.width == UINT32_MAX && surface_capabilities.currentExtent.height == UINT32_MAX) {
|
||||||
|
extent = surface_capabilities.maxImageExtent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Determine the present mode */
|
||||||
|
uint32_t present_mode_count = 0;
|
||||||
|
if (vkGetPhysicalDeviceSurfacePresentModesKHR(dev->phys_device, dev->surface, &present_mode_count, NULL) != VK_SUCCESS) {
|
||||||
|
rtReturnTemporaryArena(temp);
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
VkPresentModeKHR *present_modes = RT_ARENA_PUSH_ARRAY(temp.arena, VkPresentModeKHR, present_mode_count);
|
||||||
|
if (vkGetPhysicalDeviceSurfacePresentModesKHR(dev->phys_device, dev->surface, &present_mode_count, present_modes) != VK_SUCCESS) {
|
||||||
|
rtReturnTemporaryArena(temp);
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkPresentModeKHR present_mode = present_modes[0];
|
||||||
|
if (!r_VkEnableVSync.i) {
|
||||||
|
if (r_VkPreferMailboxMode.i) {
|
||||||
|
rtLog("VK", "r_VkPreferMailboxMode has no effect, if VSync is disabled.");
|
||||||
|
r_VkPreferMailboxMode.i = 0;
|
||||||
|
rtNotifyCVARChange(&r_VkPreferMailboxMode);
|
||||||
|
}
|
||||||
|
if (r_VkPreferRelaxedMode.i) {
|
||||||
|
rtLog("VK", "r_VkPreferRelaxedMode has no effect, if VSync is disabled.");
|
||||||
|
r_VkPreferRelaxedMode.i = 0;
|
||||||
|
rtNotifyCVARChange(&r_VkPreferRelaxedMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < present_mode_count; ++i) {
|
||||||
|
if (present_modes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR) {
|
||||||
|
present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (present_mode != VK_PRESENT_MODE_IMMEDIATE_KHR) {
|
||||||
|
rtLog("VK", "VSync was disabled, but the physical device/surface combination does not support VK_PRESENT_MODE_IMMEDIATE_KHR");
|
||||||
|
r_VkEnableVSync.i = 1;
|
||||||
|
rtNotifyCVARChange(&r_VkEnableVSync);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* NOT an else if, because we fall back to this if disabled vsync is not supported. */
|
||||||
|
if (r_VkEnableVSync.i) {
|
||||||
|
/* Required to be supported */
|
||||||
|
present_mode = VK_PRESENT_MODE_FIFO_KHR;
|
||||||
|
if (r_VkPreferMailboxMode.i || r_VkPreferRelaxedMode.i) {
|
||||||
|
for (uint32_t i = 0; i < present_mode_count; ++i) {
|
||||||
|
if (present_modes[i] == VK_PRESENT_MODE_FIFO_RELAXED_KHR && r_VkPreferRelaxedMode.i) {
|
||||||
|
present_mode = present_modes[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (present_modes[i] == VK_PRESENT_MODE_MAILBOX_KHR && r_VkPreferMailboxMode.i) {
|
||||||
|
present_mode = present_modes[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VkSwapchainCreateInfoKHR swapchain_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
|
||||||
|
.pNext = NULL,
|
||||||
|
.flags = 0,
|
||||||
|
.surface = dev->surface,
|
||||||
|
.minImageCount = num_images,
|
||||||
|
.imageFormat = surface_format,
|
||||||
|
.imageColorSpace = color_space,
|
||||||
|
.imageExtent = extent,
|
||||||
|
.imageArrayLayers = 1,
|
||||||
|
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||||
|
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||||
|
.pQueueFamilyIndices = NULL,
|
||||||
|
.queueFamilyIndexCount = 0,
|
||||||
|
.preTransform = surface_capabilities.currentTransform,
|
||||||
|
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
|
||||||
|
.presentMode = present_mode,
|
||||||
|
.clipped = VK_TRUE,
|
||||||
|
.oldSwapchain = VK_NULL_HANDLE,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (dev->graphics_family != dev->present_family) {
|
||||||
|
uint32_t queue_families[2];
|
||||||
|
queue_families[0] = dev->graphics_family;
|
||||||
|
queue_families[1] = dev->present_family;
|
||||||
|
swapchain_info.pQueueFamilyIndices = &queue_families[0];
|
||||||
|
swapchain_info.queueFamilyIndexCount = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vkCreateSwapchainKHR(dev->device, &swapchain_info, dev->alloc_cb, &dev->swapchain) != VK_SUCCESS) {
|
||||||
|
rtReturnTemporaryArena(temp);
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
vkGetSwapchainImagesKHR(dev->device, dev->swapchain, &dev->swapchain_image_count, NULL);
|
||||||
|
RT_ASSERT(dev->swapchain_image_count <= RT_ARRAY_COUNT(dev->swapchain_images), "Unexpectedly high number of swapchain images.");
|
||||||
|
vkGetSwapchainImagesKHR(dev->device, dev->swapchain, &dev->swapchain_image_count, &dev->swapchain_images[0]);
|
||||||
|
for (uint32_t i = 0; i < dev->swapchain_image_count; ++i) {
|
||||||
|
VkImageViewCreateInfo view_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||||
|
.image = dev->swapchain_images[i],
|
||||||
|
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||||
|
.format = surface_format,
|
||||||
|
.components = { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY },
|
||||||
|
.subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.baseArrayLayer = 0,.baseMipLevel = 0,
|
||||||
|
.layerCount = 1, .levelCount = 1},
|
||||||
|
};
|
||||||
|
vkCreateImageView(dev->device, &view_info, dev->alloc_cb, &dev->swapchain_image_views[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DestroySwapchain(rt_vk_device *dev) {
|
||||||
|
vkDestroySwapchainKHR(dev->device, dev->swapchain, dev->alloc_cb);
|
||||||
|
for (uint32_t i = 0; i < dev->swapchain_image_count; ++i) {
|
||||||
|
vkDestroyImageView(dev->device, dev->swapchain_image_views[i], dev->alloc_cb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_result RecreateSwapchain(rt_vk_device *dev) {
|
||||||
|
/* TODO(Kevin): Do this in a more perfomant way involving oldSwapchain */
|
||||||
|
vkDeviceWaitIdle(dev->device);
|
||||||
|
DestroySwapchain(dev);
|
||||||
|
return CreateSwapchain(dev);
|
||||||
|
}
|
12
src/renderer/vk/utils.c
Normal file
12
src/renderer/vk/utils.c
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
rt_result rtVkResultToRTResult(VkResult result) {
|
||||||
|
switch (result) {
|
||||||
|
case VK_SUCCESS:
|
||||||
|
return RT_SUCCESS;
|
||||||
|
case VK_ERROR_OUT_OF_HOST_MEMORY:
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
default:
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
}
|
9
src/renderer/vk/utils.h
Normal file
9
src/renderer/vk/utils.h
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#ifndef RT_VK_UTILS_H
|
||||||
|
#define RT_VK_UTILS_H
|
||||||
|
|
||||||
|
#include <runtime/runtime.h>
|
||||||
|
#include <volk/volk.h>
|
||||||
|
|
||||||
|
rt_result rtVkResultToRTResult(VkResult result);
|
||||||
|
|
||||||
|
#endif
|
21
src/renderer/vk/vma_impl.cpp
Normal file
21
src/renderer/vk/vma_impl.cpp
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(push, 0)
|
||||||
|
#elif defined(__GNUC__) || defined(__clang__)
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
||||||
|
#pragma GCC diagnostic ignored "-Wmissing-braces"
|
||||||
|
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
|
||||||
|
#pragma GCC diagnostic ignored "-Wconversion"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||||
|
#pragma GCC diagnostic ignored "-Wparentheses"
|
||||||
|
#endif
|
||||||
|
#include <volk/volk.h>
|
||||||
|
#define VMA_STATIC_VULKAN_FUNCTIONS 0
|
||||||
|
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
|
||||||
|
#define VMA_IMPLEMENTATION
|
||||||
|
#include <vk_mem_alloc.h>
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(pop)
|
||||||
|
#elif defined(__GNUC__) || defined(__clang__)
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
@ -1,377 +0,0 @@
|
|||||||
#include "aio.h"
|
|
||||||
#include "config.h"
|
|
||||||
#include "threading.h"
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
|
||||||
#include <windows.h>
|
|
||||||
|
|
||||||
void Win32ErrorToString(DWORD last_error, char *out, int bufsize);
|
|
||||||
|
|
||||||
#elif defined(__linux__)
|
|
||||||
#include <sched.h>
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
/* Maintain a ringbuffer of pending operations */
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
#ifdef _WIN32
|
|
||||||
HANDLE file_handle;
|
|
||||||
OVERLAPPED overlapped;
|
|
||||||
#endif
|
|
||||||
volatile rt_aio_state state;
|
|
||||||
} rt_aio;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_mutex *guard;
|
|
||||||
|
|
||||||
rt_aio *storage;
|
|
||||||
uint32_t capacity;
|
|
||||||
uint32_t head;
|
|
||||||
uint32_t tail;
|
|
||||||
} rt_aio_ringbuffer;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_aio *a;
|
|
||||||
rt_aio *b;
|
|
||||||
uint32_t a_count;
|
|
||||||
} rt_ringbuffer_space;
|
|
||||||
|
|
||||||
static rt_aio_ringbuffer _ringbuffer;
|
|
||||||
|
|
||||||
static rt_ringbuffer_space ReserveRingbufferSpace(uint32_t count) {
|
|
||||||
if (!rtLockMutex(_ringbuffer.guard)) {
|
|
||||||
rt_ringbuffer_space failed = {NULL, NULL, 0};
|
|
||||||
return failed;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_ringbuffer_space result = {NULL, NULL, 0};
|
|
||||||
|
|
||||||
if (_ringbuffer.head >= _ringbuffer.tail) {
|
|
||||||
if (_ringbuffer.head + count <= _ringbuffer.capacity) {
|
|
||||||
result.a_count = count;
|
|
||||||
result.a = &_ringbuffer.storage[_ringbuffer.head];
|
|
||||||
_ringbuffer.head = (_ringbuffer.head + count) % _ringbuffer.capacity;
|
|
||||||
} else {
|
|
||||||
/* Check if enough space is free at the end */
|
|
||||||
uint32_t a_count = _ringbuffer.capacity - _ringbuffer.head;
|
|
||||||
uint32_t b_count = count - a_count;
|
|
||||||
|
|
||||||
if (b_count <= _ringbuffer.tail) {
|
|
||||||
result.a_count = a_count;
|
|
||||||
result.a = &_ringbuffer.storage[_ringbuffer.head];
|
|
||||||
result.b = &_ringbuffer.storage[0];
|
|
||||||
_ringbuffer.head = b_count;
|
|
||||||
} else {
|
|
||||||
/* Not enough space, we would overwrite the tail */
|
|
||||||
rtLog("aio", "Ringbuffer is full.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* Head is lower than tail */
|
|
||||||
uint32_t num_free = _ringbuffer.tail - _ringbuffer.head;
|
|
||||||
if (count < num_free) {
|
|
||||||
result.a_count = count;
|
|
||||||
result.a = &_ringbuffer.storage[_ringbuffer.head];
|
|
||||||
_ringbuffer.head = (_ringbuffer.head + count) % _ringbuffer.capacity;
|
|
||||||
} else {
|
|
||||||
/* Not enough space, we would overwrite the tail */
|
|
||||||
rtLog("aio", "Ringbuffer is full.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rtUnlockMutex(_ringbuffer.guard);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
static void
|
|
||||||
win32CompletionRoutine(DWORD error_code, DWORD num_bytes_transfered, LPOVERLAPPED overlapped) {
|
|
||||||
rt_aio *op = (rt_aio *)overlapped->hEvent;
|
|
||||||
assert(op->state == RT_AIO_STATE_PENDING);
|
|
||||||
|
|
||||||
if (error_code != ERROR_SUCCESS) {
|
|
||||||
op->state = RT_AIO_STATE_FAILED;
|
|
||||||
rtLog("aio", "Async io failed: %u", error_code);
|
|
||||||
} else {
|
|
||||||
op->state = RT_AIO_STATE_FINISHED;
|
|
||||||
}
|
|
||||||
|
|
||||||
CloseHandle(op->file_handle);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_MaxConcurrentAsyncIO,
|
|
||||||
"Maximum number of concurrent async. I/O operations. Default: 1024",
|
|
||||||
1024);
|
|
||||||
|
|
||||||
rt_result InitAIO(void) {
|
|
||||||
unsigned int max_concurrent_operations = rt_MaxConcurrentAsyncIO.i;
|
|
||||||
_ringbuffer.guard = rtCreateMutex();
|
|
||||||
if (!_ringbuffer.guard) {
|
|
||||||
return RT_AIO_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
if (max_concurrent_operations == 0)
|
|
||||||
max_concurrent_operations = 1024;
|
|
||||||
|
|
||||||
_ringbuffer.storage = calloc(max_concurrent_operations, sizeof(rt_aio));
|
|
||||||
if (!_ringbuffer.storage)
|
|
||||||
return RT_AIO_OUT_OF_MEMORY;
|
|
||||||
_ringbuffer.head = 0;
|
|
||||||
_ringbuffer.tail = 0;
|
|
||||||
_ringbuffer.capacity = max_concurrent_operations;
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownAIO(void) {
|
|
||||||
rtDestroyMutex(_ringbuffer.guard);
|
|
||||||
free(_ringbuffer.storage);
|
|
||||||
_ringbuffer.capacity = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtSubmitLoadBatch(const rt_load_batch *batch, rt_aio_handle *handles) {
|
|
||||||
if (batch->num_loads > RT_LOAD_BATCH_MAX_SIZE) {
|
|
||||||
return RT_AIO_LOAD_TOO_LARGE;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_ringbuffer_space rbspace = ReserveRingbufferSpace(batch->num_loads);
|
|
||||||
if (!rbspace.a) {
|
|
||||||
rtReportError("aio", "Too many pending file operations");
|
|
||||||
return RT_AIO_TOO_MANY_OPERATIONS;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i < batch->num_loads; ++i) {
|
|
||||||
rt_aio *op = (i < rbspace.a_count) ? &rbspace.a[i] : &rbspace.b[i - rbspace.a_count];
|
|
||||||
op->state = RT_AIO_STATE_PENDING;
|
|
||||||
const char *file_path = rtGetFilePath(batch->loads[i].file);
|
|
||||||
if (!file_path) {
|
|
||||||
rtReportError("aio", "Failed to resolve file path for a batched load");
|
|
||||||
op->state = RT_AIO_STATE_INVALID;
|
|
||||||
handles[i] = RT_AIO_INVALID_HANDLE;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
#ifdef _WIN32
|
|
||||||
op->overlapped = (OVERLAPPED){
|
|
||||||
/* ReadFileEx does not use hEvent and we are free to use it for our own purposes. */
|
|
||||||
.hEvent = (HANDLE)(op),
|
|
||||||
.Internal = 0,
|
|
||||||
.InternalHigh = 0,
|
|
||||||
.Offset = (DWORD)(batch->loads[i].offset & MAXDWORD),
|
|
||||||
.OffsetHigh = (DWORD)(batch->loads[i].offset >> 32),
|
|
||||||
};
|
|
||||||
|
|
||||||
WCHAR wpath[MAX_PATH];
|
|
||||||
if (MultiByteToWideChar(CP_UTF8,
|
|
||||||
MB_PRECOMPOSED,
|
|
||||||
file_path,
|
|
||||||
-1,
|
|
||||||
wpath,
|
|
||||||
RT_ARRAY_COUNT(wpath)) == 0) {
|
|
||||||
rtReportError("aio", "MultiByteToWideChar failed with error code: %u", GetLastError());
|
|
||||||
op->state = RT_AIO_STATE_FINISHED;
|
|
||||||
handles[i] = RT_AIO_INVALID_HANDLE;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
HANDLE file_handle = CreateFileW(wpath,
|
|
||||||
GENERIC_READ,
|
|
||||||
FILE_SHARE_READ,
|
|
||||||
NULL,
|
|
||||||
OPEN_EXISTING,
|
|
||||||
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED,
|
|
||||||
NULL);
|
|
||||||
if (file_handle == INVALID_HANDLE_VALUE) {
|
|
||||||
DWORD err = GetLastError();
|
|
||||||
char error_msg[256];
|
|
||||||
Win32ErrorToString(err, error_msg, 256);
|
|
||||||
rtReportError("aio",
|
|
||||||
"CreateFileW failed for file: %s with error code: %u (%s)",
|
|
||||||
file_path,
|
|
||||||
err,
|
|
||||||
error_msg);
|
|
||||||
op->state = RT_AIO_STATE_INVALID;
|
|
||||||
handles[i] = RT_AIO_INVALID_HANDLE;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
op->file_handle = file_handle;
|
|
||||||
BOOL result = ReadFileEx(file_handle,
|
|
||||||
batch->loads[i].dest,
|
|
||||||
(DWORD)batch->loads[i].num_bytes,
|
|
||||||
&op->overlapped,
|
|
||||||
win32CompletionRoutine);
|
|
||||||
DWORD err = GetLastError();
|
|
||||||
if (!result || err != ERROR_SUCCESS) {
|
|
||||||
char error_msg[256];
|
|
||||||
Win32ErrorToString(err, error_msg, 256);
|
|
||||||
rtReportError("aio", "ReadFileEx failed with error code: %u (%s)", err, error_msg);
|
|
||||||
op->state = RT_AIO_STATE_FINISHED;
|
|
||||||
handles[i] = RT_AIO_INVALID_HANDLE;
|
|
||||||
CloseHandle(file_handle);
|
|
||||||
op->file_handle = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Handle is the index into the ringbuffer + 1 */
|
|
||||||
ptrdiff_t op_idx = op - _ringbuffer.storage;
|
|
||||||
handles[i] = (uint32_t)op_idx + 1;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtSubmitWriteBatch(const rt_write_batch *batch, rt_aio_handle *handles) {
|
|
||||||
if (batch->num_writes > RT_LOAD_BATCH_MAX_SIZE) {
|
|
||||||
return RT_AIO_WRITE_TOO_LARGE;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_ringbuffer_space rbspace = ReserveRingbufferSpace(batch->num_writes);
|
|
||||||
if (!rbspace.a) {
|
|
||||||
rtReportError("aio", "Too many pending file operations");
|
|
||||||
return RT_AIO_TOO_MANY_OPERATIONS;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i < batch->num_writes; ++i) {
|
|
||||||
rt_aio *op = (i < rbspace.a_count) ? &rbspace.a[i] : &rbspace.b[i - rbspace.a_count];
|
|
||||||
op->state = RT_AIO_STATE_PENDING;
|
|
||||||
const char *file_path = rtGetFilePath(batch->writes[i].file);
|
|
||||||
if (!file_path) {
|
|
||||||
rtReportError("aio", "Failed to resolve file path for a batched write");
|
|
||||||
op->state = RT_AIO_STATE_INVALID;
|
|
||||||
handles[i] = RT_AIO_INVALID_HANDLE;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
#ifdef _WIN32
|
|
||||||
op->overlapped = (OVERLAPPED){
|
|
||||||
/* ReadFileEx does not use hEvent and we are free to use it for our own purposes. */
|
|
||||||
.hEvent = (HANDLE)(op),
|
|
||||||
.Internal = 0,
|
|
||||||
.InternalHigh = 0,
|
|
||||||
.Offset = (DWORD)(batch->writes[i].offset & MAXDWORD),
|
|
||||||
.OffsetHigh = (DWORD)(batch->writes[i].offset >> 32),
|
|
||||||
};
|
|
||||||
|
|
||||||
WCHAR wpath[MAX_PATH];
|
|
||||||
if (MultiByteToWideChar(CP_UTF8,
|
|
||||||
MB_PRECOMPOSED,
|
|
||||||
file_path,
|
|
||||||
-1,
|
|
||||||
wpath,
|
|
||||||
RT_ARRAY_COUNT(wpath)) == 0) {
|
|
||||||
rtReportError("aio", "MultiByteToWideChar failed with error code: %u", GetLastError());
|
|
||||||
op->state = RT_AIO_STATE_FINISHED;
|
|
||||||
handles[i] = RT_AIO_INVALID_HANDLE;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
HANDLE file_handle = CreateFileW(wpath,
|
|
||||||
GENERIC_WRITE,
|
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
OPEN_ALWAYS,
|
|
||||||
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED,
|
|
||||||
NULL);
|
|
||||||
if (file_handle == INVALID_HANDLE_VALUE) {
|
|
||||||
DWORD err = GetLastError();
|
|
||||||
char error_msg[256];
|
|
||||||
Win32ErrorToString(err, error_msg, 256);
|
|
||||||
rtReportError("aio",
|
|
||||||
"CreateFileW failed for file: %s with error code: %u (%s)",
|
|
||||||
file_path,
|
|
||||||
err,
|
|
||||||
error_msg);
|
|
||||||
op->state = RT_AIO_STATE_INVALID;
|
|
||||||
handles[i] = RT_AIO_INVALID_HANDLE;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
op->file_handle = file_handle;
|
|
||||||
BOOL result = WriteFileEx(file_handle,
|
|
||||||
batch->writes[i].buffer,
|
|
||||||
(DWORD)batch->writes[i].num_bytes,
|
|
||||||
&op->overlapped,
|
|
||||||
win32CompletionRoutine);
|
|
||||||
DWORD err = GetLastError();
|
|
||||||
if (!result || (err != ERROR_SUCCESS && err != ERROR_ALREADY_EXISTS)) {
|
|
||||||
char error_msg[256];
|
|
||||||
Win32ErrorToString(err, error_msg, 256);
|
|
||||||
rtReportError("aio", "WriteFileEx failed with error code: %u (%s)", err, error_msg);
|
|
||||||
op->state = RT_AIO_STATE_FINISHED;
|
|
||||||
handles[i] = RT_AIO_INVALID_HANDLE;
|
|
||||||
CloseHandle(file_handle);
|
|
||||||
op->file_handle = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Handle is the index into the ringbuffer + 1 */
|
|
||||||
ptrdiff_t op_idx = op - _ringbuffer.storage;
|
|
||||||
handles[i] = (uint32_t)op_idx + 1;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_aio_state rtGetAIOState(rt_aio_handle handle) {
|
|
||||||
if (handle == RT_AIO_INVALID_HANDLE || handle > _ringbuffer.capacity)
|
|
||||||
return RT_AIO_STATE_INVALID;
|
|
||||||
#ifdef _WIN32
|
|
||||||
/* Give the compation function an opportunity to run */
|
|
||||||
SleepEx(0, TRUE);
|
|
||||||
#endif
|
|
||||||
rtLockMutex(_ringbuffer.guard);
|
|
||||||
rt_aio_state state = _ringbuffer.storage[handle - 1].state;
|
|
||||||
rtUnlockMutex(_ringbuffer.guard);
|
|
||||||
return state;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtReleaseAIO(rt_aio_handle handle) {
|
|
||||||
if (handle == RT_AIO_INVALID_HANDLE || handle > _ringbuffer.capacity) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rtLockMutex(_ringbuffer.guard);
|
|
||||||
_ringbuffer.storage[handle - 1].state = RT_AIO_STATE_INVALID;
|
|
||||||
if (handle - 1 == _ringbuffer.tail) {
|
|
||||||
/* Advance the tail such that it points to the last used slot. (Or to head, if the
|
|
||||||
* ringbuffer is now empty) */
|
|
||||||
uint32_t i = _ringbuffer.tail;
|
|
||||||
while ((_ringbuffer.storage[i].state == RT_AIO_STATE_INVALID) && i != _ringbuffer.head) {
|
|
||||||
i = (i + 1) % _ringbuffer.capacity;
|
|
||||||
}
|
|
||||||
_ringbuffer.tail = i;
|
|
||||||
}
|
|
||||||
rtUnlockMutex(_ringbuffer.guard);
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_aio_state rtWaitForAIOCompletion(rt_aio_handle handle) {
|
|
||||||
if (handle == RT_AIO_INVALID_HANDLE || handle > _ringbuffer.capacity)
|
|
||||||
return RT_AIO_STATE_INVALID;
|
|
||||||
rt_aio_state state;
|
|
||||||
do {
|
|
||||||
state = rtGetAIOState(handle);
|
|
||||||
/* NOTE(Kevin): This is where we could temporarily run a job. */
|
|
||||||
#ifdef _WIN32
|
|
||||||
YieldProcessor();
|
|
||||||
#elif defined(__linux__)
|
|
||||||
sched_yield();
|
|
||||||
#endif
|
|
||||||
} while (state == RT_AIO_STATE_PENDING);
|
|
||||||
return state;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtSubmitSingleLoad(rt_file_load load, rt_aio_handle *handle) {
|
|
||||||
rt_load_batch batch;
|
|
||||||
batch.loads[0] = load;
|
|
||||||
batch.num_loads = 1;
|
|
||||||
return rtSubmitLoadBatch(&batch, handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_aio_state rtSubmitSingleLoadSync(rt_file_load load) {
|
|
||||||
rt_aio_handle handle;
|
|
||||||
if (rtSubmitSingleLoad(load, &handle) != RT_SUCCESS)
|
|
||||||
return RT_AIO_STATE_FAILED;
|
|
||||||
rt_aio_state state = rtWaitForAIOCompletion(handle);
|
|
||||||
rtReleaseAIO(handle);
|
|
||||||
return state;
|
|
||||||
}
|
|
@ -23,7 +23,8 @@
|
|||||||
_InterlockedExchange_rel((volatile long *)(_pDest), (_NewVal))
|
_InterlockedExchange_rel((volatile long *)(_pDest), (_NewVal))
|
||||||
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
|
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
|
||||||
_InterlockedCompareExchange_acq((volatile long *)(_pDest), (_NewVal), (_Compare))
|
_InterlockedCompareExchange_acq((volatile long *)(_pDest), (_NewVal), (_Compare))
|
||||||
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) _InterlockedCompareExchange_rel((volatile long *)(_pDest), (_NewVal), (_Compare
|
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) \
|
||||||
|
_InterlockedCompareExchange_rel((volatile long *)(_pDest), (_NewVal), (_Compare))
|
||||||
#else
|
#else
|
||||||
/* x64/86 does not have acquire/release versions of these */
|
/* x64/86 does not have acquire/release versions of these */
|
||||||
#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \
|
#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \
|
||||||
@ -32,7 +33,8 @@
|
|||||||
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
|
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
|
||||||
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
|
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
|
||||||
_InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare))
|
_InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare))
|
||||||
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) _InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare
|
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) \
|
||||||
|
_InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare))
|
||||||
#endif
|
#endif
|
||||||
#define rtAtomic32Exchange(_pDest, _NewVal) \
|
#define rtAtomic32Exchange(_pDest, _NewVal) \
|
||||||
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
|
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
|
||||||
@ -49,9 +51,37 @@
|
|||||||
#define rtAtomic32FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
|
#define rtAtomic32FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
|
||||||
#define rtAtomic64FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
|
#define rtAtomic64FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
|
||||||
|
|
||||||
/* TODO Linux versions of compare exchange
|
/* CAS "implementations" to make it conform to our expected api, i.e. return the original value of
|
||||||
https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
|
* *dest */
|
||||||
*/
|
#define __RT_POSIX_CAS_N_IMPL(_T, _Name) \
|
||||||
|
static RT_INLINE _T _Name(volatile _T *dest, _T new_val, _T compare, int memorder) { \
|
||||||
|
_T original = compare; \
|
||||||
|
if (!__atomic_compare_exchange_n(dest, \
|
||||||
|
&compare, \
|
||||||
|
new_val, \
|
||||||
|
false, \
|
||||||
|
memorder, \
|
||||||
|
__ATOMIC_ACQUIRE)) \
|
||||||
|
original = compare; /* Overwritten on failure with the original value */ \
|
||||||
|
return original; \
|
||||||
|
}
|
||||||
|
|
||||||
|
__RT_POSIX_CAS_N_IMPL(int, __rtPOSIXCASInt)
|
||||||
|
__RT_POSIX_CAS_N_IMPL(long, __rtPOSIXCASLong)
|
||||||
|
__RT_POSIX_CAS_N_IMPL(uint32_t, __rtPOSIXCASUint32)
|
||||||
|
|
||||||
|
#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \
|
||||||
|
__atomic_exchange_n((_pDest), (_NewVal), __ATOMIC_ACQUIRE)
|
||||||
|
#define rtAtomic32ExchangeRel(_pDest, _NewVal) \
|
||||||
|
__atomic_exchange_n((_pDest), (_NewVal), __ATOMIC_RELEASE)
|
||||||
|
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
|
||||||
|
_Generic((_NewVal), int: __rtPOSIXCASInt, long: __rtPOSIXCASLong, uint32_t: __rtPOSIXCASUint32)((_pDest), (_NewVal), (_Compare), __ATOMIC_ACQUIRE)
|
||||||
|
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) \
|
||||||
|
_Generic((_NewVal), int: __rtPOSIXCASInt, long: __rtPOSIXCASLong, uint32_t: __rtPOSIXCASUint32)((_pDest), (_NewVal), (_Compare), __ATOMIC_RELEASE)
|
||||||
|
#define rtAtomic32Exchange(_pDest, _NewVal) \
|
||||||
|
__atomic_exchange_n((_pDest), (_NewVal), __ATOMIC_SEQ_CST)
|
||||||
|
#define rtAtomic32CAS(_pDest, _NewVal, _Compare) \
|
||||||
|
_Generic((_NewVal), int: __rtPOSIXCASInt, long: __rtPOSIXCASLong, uint32_t: __rtPOSIXCASUint32)((_pDest), (_NewVal), (_Compare), __ATOMIC_SEQ_CST)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
#include "runtime.h"
|
#include "runtime.h"
|
||||||
#include "threading.h"
|
#include "threading.h"
|
||||||
|
|
||||||
#include "aio.h"
|
#include "rt_aio.h"
|
||||||
#include "buffer_manager.h"
|
#include "buffer_manager.h"
|
||||||
#include "file_tab.h"
|
#include "file_tab.h"
|
||||||
#include "mem_arena.h"
|
#include "mem_arena.h"
|
||||||
@ -102,6 +102,9 @@ RT_DLLEXPORT void rtNotifyCVARChange(const rt_cvar *cvar) {
|
|||||||
case RT_CVAR_TYPE_SIZE:
|
case RT_CVAR_TYPE_SIZE:
|
||||||
rtLog("CVAR", "Changed %s to %zu.", cvar->name, cvar->sz);
|
rtLog("CVAR", "Changed %s to %zu.", cvar->name, cvar->sz);
|
||||||
break;
|
break;
|
||||||
|
case RT_CVAR_TYPE_UINT:
|
||||||
|
rtLog("CVAR", "Changed %s to %u.", cvar->name, cvar->ui);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
rtLog("CVAR", "Changed %s, but the cvar has an invalid type.", cvar->name);
|
rtLog("CVAR", "Changed %s, but the cvar has an invalid type.", cvar->name);
|
||||||
break;
|
break;
|
||||||
@ -143,38 +146,7 @@ static int Handler(void *user, const char *section, const char *name, const char
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int num_read = 0;
|
if (rtSetCVARFromString(cvar, value) != RT_SUCCESS) {
|
||||||
switch (cvar->type) {
|
|
||||||
case RT_CVAR_TYPE_INT:
|
|
||||||
num_read = sscanf(value, "%d", &cvar->i);
|
|
||||||
break;
|
|
||||||
case RT_CVAR_TYPE_FLOAT:
|
|
||||||
num_read = sscanf(value, "%f", &cvar->f);
|
|
||||||
break;
|
|
||||||
case RT_CVAR_TYPE_STRING: {
|
|
||||||
num_read = 1;
|
|
||||||
char *copy = rtStoreString(value);
|
|
||||||
if (!copy) {
|
|
||||||
rtReportError("CVAR",
|
|
||||||
"Failed to store string value of cvar %s in config file %s.",
|
|
||||||
name,
|
|
||||||
file_path);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
cvar->s = copy;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case RT_CVAR_TYPE_SIZE:
|
|
||||||
num_read = sscanf(value, "%zu", &cvar->sz);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
rtReportError("CVAR", "CVar %s has an invalid type.", cvar->name);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (num_read == 1) {
|
|
||||||
rtNotifyCVARChange(cvar);
|
|
||||||
} else {
|
|
||||||
rtLog("CVAR", "Failed to read value of CVar %s in config file %s.", cvar->name, file_path);
|
rtLog("CVAR", "Failed to read value of CVar %s in config file %s.", cvar->name, file_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -251,6 +223,8 @@ RT_DLLEXPORT rt_result rtProcessConfigFiles(unsigned int count, const rt_file_id
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i < count; ++i) {
|
for (unsigned int i = 0; i < count; ++i) {
|
||||||
|
if (aios[i] == RT_AIO_INVALID_HANDLE)
|
||||||
|
continue;
|
||||||
rt_aio_state state = rtWaitForAIOCompletion(aios[i]);
|
rt_aio_state state = rtWaitForAIOCompletion(aios[i]);
|
||||||
if (state == RT_AIO_STATE_FINISHED) {
|
if (state == RT_AIO_STATE_FINISHED) {
|
||||||
res = ProcessConfigFile(configs[i].buffer, configs[i].fsz, configs[i].path);
|
res = ProcessConfigFile(configs[i].buffer, configs[i].fsz, configs[i].path);
|
||||||
@ -302,3 +276,42 @@ void ProcessEarlyEngineConfigs(void) {
|
|||||||
}
|
}
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_result rtSetCVARFromString(rt_cvar *cvar, const char *value_str) {
|
||||||
|
|
||||||
|
int num_read = 0;
|
||||||
|
switch (cvar->type) {
|
||||||
|
case RT_CVAR_TYPE_INT:
|
||||||
|
num_read = sscanf(value_str, "%d", &cvar->i);
|
||||||
|
break;
|
||||||
|
case RT_CVAR_TYPE_FLOAT:
|
||||||
|
num_read = sscanf(value_str, "%f", &cvar->f);
|
||||||
|
break;
|
||||||
|
case RT_CVAR_TYPE_STRING: {
|
||||||
|
num_read = 1;
|
||||||
|
char *copy = rtStoreString(value_str);
|
||||||
|
if (!copy) {
|
||||||
|
rtReportError("CVAR",
|
||||||
|
"Failed to store string value of cvar %s.",
|
||||||
|
cvar->name);
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
cvar->s = copy;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case RT_CVAR_TYPE_SIZE:
|
||||||
|
num_read = sscanf(value_str, "%zu", &cvar->sz);
|
||||||
|
break;
|
||||||
|
case RT_CVAR_TYPE_UINT:
|
||||||
|
num_read = sscanf(value_str, "%u", &cvar->ui);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rtReportError("CVAR", "CVar %s has an invalid type.", cvar->name);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_read == 1) {
|
||||||
|
rtNotifyCVARChange(cvar);
|
||||||
|
}
|
||||||
|
return (num_read) ? RT_SUCCESS : RT_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
@ -13,6 +13,7 @@ typedef enum {
|
|||||||
RT_CVAR_TYPE_FLOAT,
|
RT_CVAR_TYPE_FLOAT,
|
||||||
RT_CVAR_TYPE_STRING,
|
RT_CVAR_TYPE_STRING,
|
||||||
RT_CVAR_TYPE_SIZE,
|
RT_CVAR_TYPE_SIZE,
|
||||||
|
RT_CVAR_TYPE_UINT,
|
||||||
} rt_cvar_type;
|
} rt_cvar_type;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -23,6 +24,7 @@ typedef struct {
|
|||||||
float f;
|
float f;
|
||||||
const char *s;
|
const char *s;
|
||||||
size_t sz;
|
size_t sz;
|
||||||
|
unsigned int ui;
|
||||||
};
|
};
|
||||||
rt_cvar_type type;
|
rt_cvar_type type;
|
||||||
} rt_cvar;
|
} rt_cvar;
|
||||||
@ -38,6 +40,8 @@ typedef void(rt_cvar_change_event_handler_fn)(rt_cvar *cvar, void *userdata);
|
|||||||
rt_cvar n = {.name = #n, .description = d, .s = (v), .type = RT_CVAR_TYPE_STRING}
|
rt_cvar n = {.name = #n, .description = d, .s = (v), .type = RT_CVAR_TYPE_STRING}
|
||||||
#define RT_CVAR_SZ(n, d, v) \
|
#define RT_CVAR_SZ(n, d, v) \
|
||||||
rt_cvar n = {.name = #n, .description = d, .sz = (v), .type = RT_CVAR_TYPE_SIZE}
|
rt_cvar n = {.name = #n, .description = d, .sz = (v), .type = RT_CVAR_TYPE_SIZE}
|
||||||
|
#define RT_CVAR_UI(n, d, v) \
|
||||||
|
rt_cvar n = {.name = #n, .description = d, .ui = (v), .type = RT_CVAR_TYPE_UINT}
|
||||||
|
|
||||||
RT_DLLEXPORT void rtRegisterCVAR(rt_cvar *cvar);
|
RT_DLLEXPORT void rtRegisterCVAR(rt_cvar *cvar);
|
||||||
|
|
||||||
@ -57,6 +61,8 @@ RT_DLLEXPORT void rtNotifyCVARChange(const rt_cvar *cvar);
|
|||||||
* They are processed in-order, meaning later files can overwrite earlier files. */
|
* They are processed in-order, meaning later files can overwrite earlier files. */
|
||||||
RT_DLLEXPORT rt_result rtProcessConfigFiles(unsigned int count, const rt_file_id *fids);
|
RT_DLLEXPORT rt_result rtProcessConfigFiles(unsigned int count, const rt_file_id *fids);
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_result rtSetCVARFromString(rt_cvar *cvar, const char *value_str);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -9,9 +9,20 @@
|
|||||||
#pragma warning Are you sure that you want to include this file ?
|
#pragma warning Are you sure that you want to include this file ?
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef _Maybenull_
|
||||||
#define _Maybenull_
|
#define _Maybenull_
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _In_opt_count_
|
||||||
#define _In_opt_count_(n)
|
#define _In_opt_count_(n)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _In_bytecount_
|
||||||
#define _In_bytecount_(n)
|
#define _In_bytecount_(n)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _In_count_
|
||||||
#define _In_count_(n)
|
#define _In_count_(n)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -258,17 +258,14 @@ RT_DLLEXPORT uint64_t rtGetFileModificationTimestamp(const char *path) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
RT_DLLEXPORT bool rtSyncReadWholeFile(const char *path, void *dest, size_t dest_size) {
|
RT_DLLEXPORT bool rtSyncReadWholeFile(const char *path, void *dest, size_t dest_size) {
|
||||||
|
size_t fsz = rtGetFileSize(path);
|
||||||
|
if (fsz > dest_size) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
FILE *f = fopen(path, "rb");
|
FILE *f = fopen(path, "rb");
|
||||||
if (!f)
|
if (!f)
|
||||||
return false;
|
return false;
|
||||||
fseek(f, SEEK_END, 0);
|
size_t n = fread(dest, 1, fsz, f);
|
||||||
size_t fsz = (size_t)ftell(f);
|
|
||||||
fseek(f, SEEK_SET, 0);
|
|
||||||
if (fsz > dest_size) {
|
|
||||||
fclose(f);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
size_t n = fread(dest, 1 fsz, f);
|
|
||||||
fclose(f);
|
fclose(f);
|
||||||
return n == fsz;
|
return n == fsz;
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,5 @@
|
|||||||
#include "runtime.h"
|
#include "runtime.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "aio.h"
|
|
||||||
#include "file_tab.h"
|
|
||||||
#include "buffer_manager.h"
|
|
||||||
|
|
||||||
extern rt_cvar rt_MaxConcurrentAsyncIO;
|
extern rt_cvar rt_MaxConcurrentAsyncIO;
|
||||||
extern rt_cvar rt_AssertEnabled;
|
extern rt_cvar rt_AssertEnabled;
|
||||||
|
@ -6,11 +6,16 @@ lz4_dep = lz4_proj.get_variable('liblz4_dep')
|
|||||||
xxhash_proj = subproject('xxhash', default_options: ['default_library=static', 'b_sanitize=none'])
|
xxhash_proj = subproject('xxhash', default_options: ['default_library=static', 'b_sanitize=none'])
|
||||||
xxhash_dep = xxhash_proj.get_variable('xxhash_dep')
|
xxhash_dep = xxhash_proj.get_variable('xxhash_dep')
|
||||||
|
|
||||||
runtime_deps = [thread_dep, m_dep, windowing_dep, inih_dep, lz4_dep, xxhash_dep]
|
runtime_deps = [thread_dep, m_dep, inih_dep, lz4_dep, xxhash_dep]
|
||||||
|
|
||||||
|
if host_machine.system() == 'linux'
|
||||||
|
rt_dep = declare_dependency(link_args: ['-lrt'])
|
||||||
|
runtime_deps += rt_dep
|
||||||
|
endif
|
||||||
|
|
||||||
runtime_incdirs = contrib_incdir
|
runtime_incdirs = contrib_incdir
|
||||||
runtime_lib = library('rt',
|
runtime_lib = library('rt',
|
||||||
# Project Sources
|
# Project Sources
|
||||||
'aio.h',
|
|
||||||
'atomics.h',
|
'atomics.h',
|
||||||
'buffer_manager.h',
|
'buffer_manager.h',
|
||||||
'compression.h',
|
'compression.h',
|
||||||
@ -25,13 +30,13 @@ runtime_lib = library('rt',
|
|||||||
'jobs.h',
|
'jobs.h',
|
||||||
'mem_arena.h',
|
'mem_arena.h',
|
||||||
'resources.h',
|
'resources.h',
|
||||||
|
'rt_aio.h',
|
||||||
'runtime.h',
|
'runtime.h',
|
||||||
'string_storage.h',
|
'string_storage.h',
|
||||||
'threading.h',
|
'threading.h',
|
||||||
'threading_helpers.hpp',
|
'threading_helpers.hpp',
|
||||||
'timing.h',
|
'timing.h',
|
||||||
|
|
||||||
'aio.c',
|
|
||||||
'assert.c',
|
'assert.c',
|
||||||
'buffer_manager.c',
|
'buffer_manager.c',
|
||||||
'compression.c',
|
'compression.c',
|
||||||
@ -48,6 +53,7 @@ runtime_lib = library('rt',
|
|||||||
'jobs.c',
|
'jobs.c',
|
||||||
'mem_arena.c',
|
'mem_arena.c',
|
||||||
'resource_manager.c',
|
'resource_manager.c',
|
||||||
|
'rt_aio.c',
|
||||||
'sprint.c',
|
'sprint.c',
|
||||||
'string_storage.c',
|
'string_storage.c',
|
||||||
'text.c',
|
'text.c',
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#include "aio.h"
|
#include "rt_aio.h"
|
||||||
#include "buffer_manager.h"
|
#include "buffer_manager.h"
|
||||||
#include "compression.h"
|
#include "compression.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
@ -10,7 +10,7 @@
|
|||||||
#include "resources.h"
|
#include "resources.h"
|
||||||
#include "threading.h"
|
#include "threading.h"
|
||||||
|
|
||||||
#include "renderer/common/renderer_api.h"
|
#include "renderer/renderer.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
503
src/runtime/rt_aio.c
Normal file
503
src/runtime/rt_aio.c
Normal file
@ -0,0 +1,503 @@
|
|||||||
|
#include "rt_aio.h"
|
||||||
|
#include "config.h"
|
||||||
|
#include "runtime.h"
|
||||||
|
#include "threading.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#include <windows.h>
|
||||||
|
|
||||||
|
void Win32ErrorToString(DWORD last_error, char *out, int bufsize);
|
||||||
|
|
||||||
|
#elif defined(__linux__)
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/file.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <aio.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
|
||||||
|
#define IO_SIGNAL SIGUSR1
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/* Maintain a ringbuffer of pending operations */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
#ifdef _WIN32
|
||||||
|
HANDLE file_handle;
|
||||||
|
OVERLAPPED overlapped;
|
||||||
|
#elif defined(__linux__)
|
||||||
|
int fd;
|
||||||
|
struct aiocb64 cb;
|
||||||
|
#endif
|
||||||
|
volatile rt_aio_state state;
|
||||||
|
} rt_aio;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_mutex *guard;
|
||||||
|
|
||||||
|
rt_aio *storage;
|
||||||
|
uint32_t capacity;
|
||||||
|
uint32_t head;
|
||||||
|
uint32_t tail;
|
||||||
|
} rt_aio_ringbuffer;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_aio *a;
|
||||||
|
rt_aio *b;
|
||||||
|
uint32_t a_count;
|
||||||
|
} rt_ringbuffer_space;
|
||||||
|
|
||||||
|
static rt_aio_ringbuffer _ringbuffer;
|
||||||
|
|
||||||
|
static rt_ringbuffer_space ReserveRingbufferSpace(uint32_t count) {
|
||||||
|
if (!rtLockMutex(_ringbuffer.guard)) {
|
||||||
|
rt_ringbuffer_space failed = {NULL, NULL, 0};
|
||||||
|
return failed;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_ringbuffer_space result = {NULL, NULL, 0};
|
||||||
|
|
||||||
|
if (_ringbuffer.head >= _ringbuffer.tail) {
|
||||||
|
if (_ringbuffer.head + count <= _ringbuffer.capacity) {
|
||||||
|
result.a_count = count;
|
||||||
|
result.a = &_ringbuffer.storage[_ringbuffer.head];
|
||||||
|
_ringbuffer.head = (_ringbuffer.head + count) % _ringbuffer.capacity;
|
||||||
|
} else {
|
||||||
|
/* Check if enough space is free at the end */
|
||||||
|
uint32_t a_count = _ringbuffer.capacity - _ringbuffer.head;
|
||||||
|
uint32_t b_count = count - a_count;
|
||||||
|
|
||||||
|
if (b_count <= _ringbuffer.tail) {
|
||||||
|
result.a_count = a_count;
|
||||||
|
result.a = &_ringbuffer.storage[_ringbuffer.head];
|
||||||
|
result.b = &_ringbuffer.storage[0];
|
||||||
|
_ringbuffer.head = b_count;
|
||||||
|
} else {
|
||||||
|
/* Not enough space, we would overwrite the tail */
|
||||||
|
rtLog("aio", "Ringbuffer is full.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Head is lower than tail */
|
||||||
|
uint32_t num_free = _ringbuffer.tail - _ringbuffer.head;
|
||||||
|
if (count < num_free) {
|
||||||
|
result.a_count = count;
|
||||||
|
result.a = &_ringbuffer.storage[_ringbuffer.head];
|
||||||
|
_ringbuffer.head = (_ringbuffer.head + count) % _ringbuffer.capacity;
|
||||||
|
} else {
|
||||||
|
/* Not enough space, we would overwrite the tail */
|
||||||
|
rtLog("aio", "Ringbuffer is full.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rtUnlockMutex(_ringbuffer.guard);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static void
|
||||||
|
win32CompletionRoutine(DWORD error_code, DWORD num_bytes_transfered, LPOVERLAPPED overlapped) {
|
||||||
|
rt_aio *op = (rt_aio *)overlapped->hEvent;
|
||||||
|
assert(op->state == RT_AIO_STATE_PENDING);
|
||||||
|
|
||||||
|
if (error_code != ERROR_SUCCESS) {
|
||||||
|
op->state = RT_AIO_STATE_FAILED;
|
||||||
|
rtLog("aio", "Async io failed: %u", error_code);
|
||||||
|
} else {
|
||||||
|
op->state = RT_AIO_STATE_FINISHED;
|
||||||
|
}
|
||||||
|
|
||||||
|
CloseHandle(op->file_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__linux__)
|
||||||
|
static void linuxAIOSigHandler(int sig, siginfo_t *si, void *ucontext) {
|
||||||
|
RT_ASSERT(sig == IO_SIGNAL, "The signal handler was called for an unexpected signal.");
|
||||||
|
if (si->si_code !=SI_ASYNCIO)
|
||||||
|
return;
|
||||||
|
rt_aio *op = si->si_value.sival_ptr;
|
||||||
|
RT_ASSERT(op->state == RT_AIO_STATE_PENDING, "The async io operation was in an unexpected state.");
|
||||||
|
|
||||||
|
if (si->si_errno != 0) {
|
||||||
|
const char *err = strerror(si->si_errno);
|
||||||
|
rtLog("aio", "Async io failed: %u (%s)", si->si_errno, err);
|
||||||
|
op->state = RT_AIO_STATE_FAILED;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
op->state = RT_AIO_STATE_FINISHED;
|
||||||
|
}
|
||||||
|
|
||||||
|
close(op->fd);
|
||||||
|
op->fd = -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
RT_CVAR_I(rt_MaxConcurrentAsyncIO,
|
||||||
|
"Maximum number of concurrent async. I/O operations. Default: 1024",
|
||||||
|
1024);
|
||||||
|
|
||||||
|
rt_result InitAIO(void) {
|
||||||
|
unsigned int max_concurrent_operations = rt_MaxConcurrentAsyncIO.i;
|
||||||
|
_ringbuffer.guard = rtCreateMutex();
|
||||||
|
if (!_ringbuffer.guard) {
|
||||||
|
return RT_AIO_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
if (max_concurrent_operations == 0)
|
||||||
|
max_concurrent_operations = 1024;
|
||||||
|
|
||||||
|
_ringbuffer.storage = calloc(max_concurrent_operations, sizeof(rt_aio));
|
||||||
|
if (!_ringbuffer.storage)
|
||||||
|
return RT_AIO_OUT_OF_MEMORY;
|
||||||
|
_ringbuffer.head = 0;
|
||||||
|
_ringbuffer.tail = 0;
|
||||||
|
_ringbuffer.capacity = max_concurrent_operations;
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
/* Register the handler for the IO completion signal */
|
||||||
|
struct sigaction sa;
|
||||||
|
sigemptyset(&sa.sa_mask);
|
||||||
|
sa.sa_flags = SA_RESTART | SA_SIGINFO;
|
||||||
|
sa.sa_sigaction = linuxAIOSigHandler;
|
||||||
|
if (sigaction(IO_SIGNAL, &sa, NULL) == -1) {
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ShutdownAIO(void) {
|
||||||
|
rtDestroyMutex(_ringbuffer.guard);
|
||||||
|
free(_ringbuffer.storage);
|
||||||
|
_ringbuffer.capacity = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static void win32LoadBatchInner(const char *file_path, const rt_file_load *load, rt_aio *op, rt_aio_handle *handle) {
|
||||||
|
op->overlapped = (OVERLAPPED){
|
||||||
|
/* ReadFileEx does not use hEvent and we are free to use it for our own purposes. */
|
||||||
|
.hEvent = (HANDLE)(op),
|
||||||
|
.Internal = 0,
|
||||||
|
.InternalHigh = 0,
|
||||||
|
.Offset = (DWORD)(load->offset & MAXDWORD),
|
||||||
|
.OffsetHigh = (DWORD)(load->offset >> 32),
|
||||||
|
};
|
||||||
|
|
||||||
|
WCHAR wpath[MAX_PATH];
|
||||||
|
if (MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, file_path, -1, wpath, RT_ARRAY_COUNT(wpath)) ==
|
||||||
|
0) {
|
||||||
|
rtReportError("aio", "MultiByteToWideChar failed with error code: %u", GetLastError());
|
||||||
|
op->state = RT_AIO_STATE_FINISHED;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
HANDLE file_handle = CreateFileW(wpath,
|
||||||
|
GENERIC_READ,
|
||||||
|
FILE_SHARE_READ,
|
||||||
|
NULL,
|
||||||
|
OPEN_EXISTING,
|
||||||
|
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED,
|
||||||
|
NULL);
|
||||||
|
if (file_handle == INVALID_HANDLE_VALUE) {
|
||||||
|
DWORD err = GetLastError();
|
||||||
|
char error_msg[256];
|
||||||
|
Win32ErrorToString(err, error_msg, 256);
|
||||||
|
rtReportError("aio",
|
||||||
|
"CreateFileW failed for file: %s with error code: %u (%s)",
|
||||||
|
file_path,
|
||||||
|
err,
|
||||||
|
error_msg);
|
||||||
|
op->state = RT_AIO_STATE_INVALID;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
op->file_handle = file_handle;
|
||||||
|
BOOL result = ReadFileEx(file_handle,
|
||||||
|
load->dest,
|
||||||
|
(DWORD)load->num_bytes,
|
||||||
|
&op->overlapped,
|
||||||
|
win32CompletionRoutine);
|
||||||
|
DWORD err = GetLastError();
|
||||||
|
if (!result || err != ERROR_SUCCESS) {
|
||||||
|
char error_msg[256];
|
||||||
|
Win32ErrorToString(err, error_msg, 256);
|
||||||
|
rtReportError("aio", "ReadFileEx failed with error code: %u (%s)", err, error_msg);
|
||||||
|
op->state = RT_AIO_STATE_FINISHED;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
CloseHandle(file_handle);
|
||||||
|
op->file_handle = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle is the index into the ringbuffer + 1 */
|
||||||
|
ptrdiff_t op_idx = op - _ringbuffer.storage;
|
||||||
|
*handle = (uint32_t)op_idx + 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
static void linuxLoadBatchInner(const char *file_path, const rt_file_load *load, rt_aio *op, rt_aio_handle *handle) {
|
||||||
|
memset(&op->cb, 0, sizeof(op->cb));
|
||||||
|
int fd = open(file_path, O_RDONLY | O_LARGEFILE);
|
||||||
|
if (fd == -1) {
|
||||||
|
const char *err = strerror(errno);
|
||||||
|
rtReportError("aio", "open failed for file: %s with error: %d (%s)", file_path, errno, err);;
|
||||||
|
op->state = RT_AIO_STATE_INVALID;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
op->fd = fd;
|
||||||
|
op->cb.aio_fildes = fd;
|
||||||
|
op->cb.aio_offset = load->offset;
|
||||||
|
op->cb.aio_buf = load->dest;
|
||||||
|
op->cb.aio_nbytes = load->num_bytes;
|
||||||
|
op->cb.aio_reqprio = 0;
|
||||||
|
op->cb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
|
||||||
|
op->cb.aio_sigevent.sigev_signo = IO_SIGNAL;
|
||||||
|
op->cb.aio_sigevent.sigev_value.sival_ptr = op;
|
||||||
|
|
||||||
|
if (aio_read64(&op->cb) == -1) {
|
||||||
|
const char *err = strerror(errno);
|
||||||
|
rtReportError("aio", "aio_read64 failed for file: %s with error: %d (%s)", file_path, errno, err);
|
||||||
|
close(fd);
|
||||||
|
op->state = RT_AIO_STATE_INVALID;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptrdiff_t op_idx = op - _ringbuffer.storage;
|
||||||
|
*handle = (uint32_t)op_idx + 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_result rtSubmitLoadBatch(const rt_load_batch *batch, rt_aio_handle *handles) {
|
||||||
|
if (batch->num_loads > RT_LOAD_BATCH_MAX_SIZE) {
|
||||||
|
return RT_AIO_LOAD_TOO_LARGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_ringbuffer_space rbspace = ReserveRingbufferSpace(batch->num_loads);
|
||||||
|
if (!rbspace.a) {
|
||||||
|
rtReportError("aio", "Too many pending file operations");
|
||||||
|
return RT_AIO_TOO_MANY_OPERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < batch->num_loads; ++i) {
|
||||||
|
rt_aio *op = (i < rbspace.a_count) ? &rbspace.a[i] : &rbspace.b[i - rbspace.a_count];
|
||||||
|
op->state = RT_AIO_STATE_PENDING;
|
||||||
|
const char *file_path = rtGetFilePath(batch->loads[i].file);
|
||||||
|
if (!file_path) {
|
||||||
|
rtReportError("aio", "Failed to resolve file path for a batched load");
|
||||||
|
op->state = RT_AIO_STATE_INVALID;
|
||||||
|
handles[i] = RT_AIO_INVALID_HANDLE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#ifdef _WIN32
|
||||||
|
win32LoadBatchInner(file_path, &batch->loads[i], op, &handles[i]);
|
||||||
|
#elif defined(__linux__)
|
||||||
|
linuxLoadBatchInner(file_path, &batch->loads[i], op, &handles[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static void win32WriteBatchInner(const char *file_path, const rt_file_write *write, rt_aio *op, rt_aio_handle *handle) {
|
||||||
|
op->overlapped = (OVERLAPPED){
|
||||||
|
/* ReadFileEx does not use hEvent and we are free to use it for our own purposes. */
|
||||||
|
.hEvent = (HANDLE)(op),
|
||||||
|
.Internal = 0,
|
||||||
|
.InternalHigh = 0,
|
||||||
|
.Offset = (DWORD)(write->offset & MAXDWORD),
|
||||||
|
.OffsetHigh = (DWORD)(write->offset >> 32),
|
||||||
|
};
|
||||||
|
|
||||||
|
WCHAR wpath[MAX_PATH];
|
||||||
|
if (MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, file_path, -1, wpath, RT_ARRAY_COUNT(wpath)) ==
|
||||||
|
0) {
|
||||||
|
rtReportError("aio", "MultiByteToWideChar failed with error code: %u", GetLastError());
|
||||||
|
op->state = RT_AIO_STATE_FINISHED;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
HANDLE file_handle = CreateFileW(wpath,
|
||||||
|
GENERIC_WRITE,
|
||||||
|
0,
|
||||||
|
NULL,
|
||||||
|
OPEN_ALWAYS,
|
||||||
|
FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED,
|
||||||
|
NULL);
|
||||||
|
if (file_handle == INVALID_HANDLE_VALUE) {
|
||||||
|
DWORD err = GetLastError();
|
||||||
|
char error_msg[256];
|
||||||
|
Win32ErrorToString(err, error_msg, 256);
|
||||||
|
rtReportError("aio",
|
||||||
|
"CreateFileW failed for file: %s with error code: %u (%s)",
|
||||||
|
file_path,
|
||||||
|
err,
|
||||||
|
error_msg);
|
||||||
|
op->state = RT_AIO_STATE_INVALID;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
op->file_handle = file_handle;
|
||||||
|
BOOL result = WriteFileEx(file_handle,
|
||||||
|
write->buffer,
|
||||||
|
(DWORD)write->num_bytes,
|
||||||
|
&op->overlapped,
|
||||||
|
win32CompletionRoutine);
|
||||||
|
DWORD err = GetLastError();
|
||||||
|
if (!result || (err != ERROR_SUCCESS && err != ERROR_ALREADY_EXISTS)) {
|
||||||
|
char error_msg[256];
|
||||||
|
Win32ErrorToString(err, error_msg, 256);
|
||||||
|
rtReportError("aio", "WriteFileEx failed with error code: %u (%s)", err, error_msg);
|
||||||
|
op->state = RT_AIO_STATE_FINISHED;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
CloseHandle(file_handle);
|
||||||
|
op->file_handle = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle is the index into the ringbuffer + 1 */
|
||||||
|
ptrdiff_t op_idx = op - _ringbuffer.storage;
|
||||||
|
*handle = (uint32_t)op_idx + 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
static void linuxWriteBatchInner(const char *file_path, const rt_file_write *write, rt_aio *op, rt_aio_handle *handle) {
|
||||||
|
memset(&op->cb, 0, sizeof(op->cb));
|
||||||
|
int fd = open(file_path, O_WRONLY | O_CREAT | O_LARGEFILE);
|
||||||
|
if (fd == -1) {
|
||||||
|
const char *err = strerror(errno);
|
||||||
|
rtReportError("aio", "open failed for file: %s with error: %d (%s)", file_path, errno, err);;
|
||||||
|
op->state = RT_AIO_STATE_INVALID;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
op->fd = fd;
|
||||||
|
op->cb.aio_fildes = fd;
|
||||||
|
op->cb.aio_offset = write->offset;
|
||||||
|
op->cb.aio_buf = (volatile void *)write->buffer;
|
||||||
|
op->cb.aio_nbytes = write->num_bytes;
|
||||||
|
op->cb.aio_reqprio = 0;
|
||||||
|
op->cb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
|
||||||
|
op->cb.aio_sigevent.sigev_signo = IO_SIGNAL;
|
||||||
|
op->cb.aio_sigevent.sigev_value.sival_ptr = op;
|
||||||
|
|
||||||
|
if (aio_write64(&op->cb) == -1) {
|
||||||
|
const char *err = strerror(errno);
|
||||||
|
rtReportError("aio", "aio_write64 failed for file: %s with error: %d (%s)", file_path, errno, err);
|
||||||
|
close(fd);
|
||||||
|
op->state = RT_AIO_STATE_INVALID;
|
||||||
|
*handle = RT_AIO_INVALID_HANDLE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptrdiff_t op_idx = op - _ringbuffer.storage;
|
||||||
|
*handle = (uint32_t)op_idx + 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_result rtSubmitWriteBatch(const rt_write_batch *batch, rt_aio_handle *handles) {
|
||||||
|
if (batch->num_writes > RT_LOAD_BATCH_MAX_SIZE) {
|
||||||
|
return RT_AIO_WRITE_TOO_LARGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_ringbuffer_space rbspace = ReserveRingbufferSpace(batch->num_writes);
|
||||||
|
if (!rbspace.a) {
|
||||||
|
rtReportError("aio", "Too many pending file operations");
|
||||||
|
return RT_AIO_TOO_MANY_OPERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < batch->num_writes; ++i) {
|
||||||
|
rt_aio *op = (i < rbspace.a_count) ? &rbspace.a[i] : &rbspace.b[i - rbspace.a_count];
|
||||||
|
op->state = RT_AIO_STATE_PENDING;
|
||||||
|
const char *file_path = rtGetFilePath(batch->writes[i].file);
|
||||||
|
if (!file_path) {
|
||||||
|
rtReportError("aio", "Failed to resolve file path for a batched write");
|
||||||
|
op->state = RT_AIO_STATE_INVALID;
|
||||||
|
handles[i] = RT_AIO_INVALID_HANDLE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#ifdef _WIN32
|
||||||
|
win32WriteBatchInner(file_path, &batch->writes[i], op, &handles[i]);
|
||||||
|
#elif defined(__linux__)
|
||||||
|
linuxWriteBatchInner(file_path, &batch->writes[i], op, &handles[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_aio_state rtGetAIOState(rt_aio_handle handle) {
|
||||||
|
if (handle == RT_AIO_INVALID_HANDLE || handle > _ringbuffer.capacity)
|
||||||
|
return RT_AIO_STATE_INVALID;
|
||||||
|
#ifdef _WIN32
|
||||||
|
/* Give the compation function an opportunity to run */
|
||||||
|
SleepEx(0, TRUE);
|
||||||
|
#endif
|
||||||
|
rtLockMutex(_ringbuffer.guard);
|
||||||
|
rt_aio_state state = _ringbuffer.storage[handle - 1].state;
|
||||||
|
rtUnlockMutex(_ringbuffer.guard);
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT void rtReleaseAIO(rt_aio_handle handle) {
|
||||||
|
if (handle == RT_AIO_INVALID_HANDLE || handle > _ringbuffer.capacity) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
rtLockMutex(_ringbuffer.guard);
|
||||||
|
_ringbuffer.storage[handle - 1].state = RT_AIO_STATE_INVALID;
|
||||||
|
if (handle - 1 == _ringbuffer.tail) {
|
||||||
|
/* Advance the tail such that it points to the last used slot. (Or to head, if the
|
||||||
|
* ringbuffer is now empty) */
|
||||||
|
uint32_t i = _ringbuffer.tail;
|
||||||
|
while ((_ringbuffer.storage[i].state == RT_AIO_STATE_INVALID) && i != _ringbuffer.head) {
|
||||||
|
i = (i + 1) % _ringbuffer.capacity;
|
||||||
|
}
|
||||||
|
_ringbuffer.tail = i;
|
||||||
|
}
|
||||||
|
rtUnlockMutex(_ringbuffer.guard);
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_aio_state rtWaitForAIOCompletion(rt_aio_handle handle) {
|
||||||
|
if (handle == RT_AIO_INVALID_HANDLE || handle > _ringbuffer.capacity)
|
||||||
|
return RT_AIO_STATE_INVALID;
|
||||||
|
rt_aio_state state;
|
||||||
|
do {
|
||||||
|
state = rtGetAIOState(handle);
|
||||||
|
/* NOTE(Kevin): This is where we could temporarily run a job. */
|
||||||
|
#ifdef _WIN32
|
||||||
|
YieldProcessor();
|
||||||
|
#elif defined(__linux__)
|
||||||
|
sched_yield();
|
||||||
|
#endif
|
||||||
|
} while (state == RT_AIO_STATE_PENDING);
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_result rtSubmitSingleLoad(rt_file_load load, rt_aio_handle *handle) {
|
||||||
|
rt_load_batch batch;
|
||||||
|
batch.loads[0] = load;
|
||||||
|
batch.num_loads = 1;
|
||||||
|
return rtSubmitLoadBatch(&batch, handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_aio_state rtSubmitSingleLoadSync(rt_file_load load) {
|
||||||
|
rt_aio_handle handle;
|
||||||
|
if (rtSubmitSingleLoad(load, &handle) != RT_SUCCESS)
|
||||||
|
return RT_AIO_STATE_FAILED;
|
||||||
|
rt_aio_state state = rtWaitForAIOCompletion(handle);
|
||||||
|
rtReleaseAIO(handle);
|
||||||
|
return state;
|
||||||
|
}
|
@ -75,6 +75,7 @@ enum {
|
|||||||
RT_SUCCESS = 0,
|
RT_SUCCESS = 0,
|
||||||
RT_OUT_OF_MEMORY = 1,
|
RT_OUT_OF_MEMORY = 1,
|
||||||
RT_INVALID_VALUE = 2,
|
RT_INVALID_VALUE = 2,
|
||||||
|
RT_NOT_SUPPORTED = 3,
|
||||||
|
|
||||||
RT_CUSTOM_ERROR_START,
|
RT_CUSTOM_ERROR_START,
|
||||||
|
|
||||||
@ -86,6 +87,10 @@ typedef struct {
|
|||||||
unsigned int length;
|
unsigned int length;
|
||||||
} rt_text_span;
|
} rt_text_span;
|
||||||
|
|
||||||
|
typedef uint32_t rt_bool32;
|
||||||
|
#define RT_TRUE 1u
|
||||||
|
#define RT_FALSE 0u
|
||||||
|
|
||||||
/* snprintf replacement.
|
/* snprintf replacement.
|
||||||
* Always returns a zero terminated string.
|
* Always returns a zero terminated string.
|
||||||
*/
|
*/
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include "timing.h"
|
#include "timing.h"
|
||||||
|
#include "runtime.h"
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
#define WIN32_LEAN_AND_MEAN
|
#define WIN32_LEAN_AND_MEAN
|
||||||
@ -27,6 +28,44 @@ RT_DLLEXPORT rt_timestamp rtTimeNow(void) {
|
|||||||
return (rt_timestamp){.ticks = qpc.QuadPart, .ticks_per_second = _QPC_freq};
|
return (rt_timestamp){.ticks = qpc.QuadPart, .ticks_per_second = _QPC_freq};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(__linux__)
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
static uint64_t _gettime_freq = 0u;
|
||||||
|
static clockid_t _clock_id = CLOCK_REALTIME;
|
||||||
|
|
||||||
|
rt_result InitTiming(void) {
|
||||||
|
|
||||||
|
struct timespec res;
|
||||||
|
_clock_id = CLOCK_PROCESS_CPUTIME_ID;
|
||||||
|
if (clock_getres(CLOCK_PROCESS_CPUTIME_ID, &res) != 0){
|
||||||
|
/* Fallback to CLOCK_REALTIME */
|
||||||
|
_clock_id = CLOCK_REALTIME;
|
||||||
|
if (clock_getres(CLOCK_REALTIME, &res) != 0) {
|
||||||
|
return RT_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res.tv_sec > 0) {
|
||||||
|
rtReportError("TIMING", "Clock reports resolution greater than 1 second.");
|
||||||
|
return RT_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
_gettime_freq = 1000000000;
|
||||||
|
double us_res = (double)res.tv_nsec / 1e3;
|
||||||
|
|
||||||
|
rtLog("TIMING", "clock_gettime resolution: %.3lf us.", us_res);
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
RT_DLLEXPORT rt_timestamp rtTimeNow(void) {
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(_clock_id, &ts );
|
||||||
|
return (rt_timestamp){.ticks = ts.tv_nsec, .ticks_per_second = _gettime_freq};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
RT_DLLEXPORT rt_time_delta rtTimeBetween(rt_timestamp a, rt_timestamp b) {
|
RT_DLLEXPORT rt_time_delta rtTimeBetween(rt_timestamp a, rt_timestamp b) {
|
||||||
double a_secs, b_secs;
|
double a_secs, b_secs;
|
||||||
a_secs = (double)a.ticks / (double)a.ticks_per_second;
|
a_secs = (double)a.ticks / (double)a.ticks_per_second;
|
||||||
@ -34,4 +73,3 @@ RT_DLLEXPORT rt_time_delta rtTimeBetween(rt_timestamp a, rt_timestamp b) {
|
|||||||
return b_secs - a_secs;
|
return b_secs - a_secs;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
13
subprojects/vulkan-headers.wrap
Normal file
13
subprojects/vulkan-headers.wrap
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[wrap-file]
|
||||||
|
directory = Vulkan-Headers-1.3.283
|
||||||
|
source_url = https://github.com/KhronosGroup/Vulkan-Headers/archive/v1.3.283.tar.gz
|
||||||
|
source_filename = vulkan-headers-1.3.283.tar.gz
|
||||||
|
source_hash = a76ff77815012c76abc9811215c2167128a73a697bcc23948e858d1f7dd54a85
|
||||||
|
patch_filename = vulkan-headers_1.3.283-1_patch.zip
|
||||||
|
patch_url = https://wrapdb.mesonbuild.com/v2/vulkan-headers_1.3.283-1/get_patch
|
||||||
|
patch_hash = 00e30d35117ae90a19b5b8878746fceaf31b41778b817ca9e6b3ae6063be8233
|
||||||
|
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/vulkan-headers_1.3.283-1/vulkan-headers-1.3.283.tar.gz
|
||||||
|
wrapdb_version = 1.3.283-1
|
||||||
|
|
||||||
|
[provide]
|
||||||
|
vulkanheaders = vulkan_headers_dep
|
13
subprojects/vulkan-memory-allocator.wrap
Normal file
13
subprojects/vulkan-memory-allocator.wrap
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[wrap-file]
|
||||||
|
directory = VulkanMemoryAllocator-3.1.0
|
||||||
|
source_url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/archive/refs/tags/v3.1.0.tar.gz
|
||||||
|
source_filename = VulkanMemoryAllocator-3.1.0.tar.gz
|
||||||
|
source_hash = ae134ecc37c55634f108e926f85d5d887b670360e77cd107affaf3a9539595f2
|
||||||
|
patch_filename = vulkan-memory-allocator_3.1.0-1_patch.zip
|
||||||
|
patch_url = https://wrapdb.mesonbuild.com/v2/vulkan-memory-allocator_3.1.0-1/get_patch
|
||||||
|
patch_hash = d62a983856146f4529c5c5a46b13c0451a4f7e02d0966606dcd054a204c5fd80
|
||||||
|
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/vulkan-memory-allocator_3.1.0-1/VulkanMemoryAllocator-3.1.0.tar.gz
|
||||||
|
wrapdb_version = 3.1.0-1
|
||||||
|
|
||||||
|
[provide]
|
||||||
|
vulkan-memory-allocator = vma_allocator_dep
|
@ -16,7 +16,7 @@ khronos_validation.validate_sync = true
|
|||||||
khronos_validation.thread_safety = true
|
khronos_validation.thread_safety = true
|
||||||
|
|
||||||
# Specifies what action is to be taken when a layer reports information
|
# Specifies what action is to be taken when a layer reports information
|
||||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG
|
#khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG
|
||||||
|
|
||||||
# Comma-delineated list of options specifying the types of messages to be reported
|
# Comma-delineated list of options specifying the types of messages to be reported
|
||||||
khronos_validation.report_flags = debug,error,perf,info,warn
|
khronos_validation.report_flags = debug,error,perf,info,warn
|
||||||
|
Loading…
Reference in New Issue
Block a user