Rip out renderer code

THIS WILL NOT COMPILE
This commit is contained in:
Kevin Trogant 2024-06-04 11:45:55 +02:00
parent 6b830f3ff2
commit b0e6839a1c
56 changed files with 0 additions and 8227 deletions

View File

@ -1,10 +0,0 @@
#define RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL
#include "builtin_objects.h"
rt_builtin_render_object_types g_builtin_render_object_types;
RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void) {
g_builtin_render_object_types.render_mesh =
rtRegisterRenderObjectType(sizeof(rt_render_mesh), "render_mesh");
return RT_SUCCESS;
}

View File

@ -1,40 +0,0 @@
#ifndef RT_GFX_BUILTIN_OBJECTS_H
#define RT_GFX_BUILTIN_OBJECTS_H
/* Render Object types used by the builtin graphics passes.
*
* As an user you are free to not use these, but then you
* also cannot use the builtin render passes. */
#include "renderer_api.h"
#include "render_list.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
rt_pipeline_handle pipeline;
rt_buffer_handle vbo;
rt_buffer_handle ibo;
uint32_t vertex_count;
uint32_t index_count;
} rt_render_mesh;
typedef struct {
rt_render_object_type render_mesh;
} rt_builtin_render_object_types;
#ifndef RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL
extern RT_DLLIMPORT rt_builtin_render_object_types g_builtin_render_object_types;
#endif
RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,260 +0,0 @@
#include "effect.h"
#include "runtime/config.h"
#include "runtime/ds.h"
#include "runtime/handles.h"
#include "runtime/hashing.h"
#include "runtime/mem_arena.h"
#include "runtime/threading.h"
#include "runtime/atomics.h"
#include <stdlib.h>
#include <string.h>
RT_CVAR_SZ(rt_EffectCacheSize, "The number of slots in the effect cache. Default: 1024", 1024);
typedef struct {
rt_resource_id resource;
rt_effect effect;
_Alignas(4) unsigned int refcount;
} rt_effect_cache_slot;
/* We use a hashtable to find previously loaded effects.
* To reclaim unreferenced slots when we need to, we use a minheap.
* The minheap implements a LRU list. To track usage, we use a global running "usage counter",
* incremented whenever an effect is loaded.
*/
typedef struct {
rt_effect_cache_slot *slots;
rt_hashtable lut;
rt_minheap reclaim_heap;
/* Linearly allocate slots until we reach capacity */
size_t next_free;
/* Used to track "time" since an effect was loaded */
_Alignas(4) int usage_counter;
void *memory;
rt_rwlock lock;
} rt_effect_cache;
static rt_effect_cache _cache;
rt_result InitEffectCache(void) {
if (!RT_IS_POWER_OF_TWO(rt_EffectCacheSize.sz)) {
rtReportError(
"GFX",
"The value of \"rt_EffectCacheSize\" must be a power of two.\nConfigured: %zu.",
rt_EffectCacheSize.sz);
return RT_INVALID_VALUE;
}
rt_create_rwlock_result lock_res = rtCreateRWLock();
if (!lock_res.ok)
return RT_UNKNOWN_ERROR;
_cache.lock = lock_res.lock;
size_t mem_required = sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz +
RT_HASH_TABLE_MEMORY_REQUIRED(
2 * rt_EffectCacheSize.sz) + /* double to keep performance up */
sizeof(int) * rt_EffectCacheSize.sz + /* heap keys */
sizeof(size_t) * rt_EffectCacheSize.sz; /* heap values */
_cache.memory = malloc(mem_required);
if (!_cache.memory) {
rtDestroyRWLock(&_cache.lock);
return RT_OUT_OF_MEMORY;
}
_cache.lut = rtCreateHashtable(rt_EffectCacheSize.sz, _cache.memory, NULL, NULL);
int *keys =
(int *)((char *)_cache.memory + RT_HASH_TABLE_MEMORY_REQUIRED(2 * rt_EffectCacheSize.sz));
size_t *values = (size_t *)(keys + rt_EffectCacheSize.sz);
_cache.reclaim_heap = rtCreateMinheap(keys, values, sizeof(size_t), rt_EffectCacheSize.sz, 0);
_cache.usage_counter = 0;
_cache.slots = (rt_effect_cache_slot *)(values + rt_EffectCacheSize.sz);
memset(_cache.slots, 0, sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz);
return RT_SUCCESS;
}
void ShutdownEffectCache(void) {
free(_cache.memory);
}
RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len) {
uint32_t id = rtHashBytes32(name, len);
if (id == 0)
id = ~id;
return id;
}
RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len) {
uint32_t id = rtHashBytes32(name, len);
if (id == 0)
id = ~id;
return id;
}
static void ReleaseEffect(rt_effect *effect) {
for (unsigned int i = 0; i < effect->pass_count; ++i) {
g_renderer.DestroyPipeline(effect->passes[i].pipeline);
}
}
/* Returns the index of the reserved slot */
static size_t ReserveSlot(rt_resource_id id) {
if (_cache.next_free < rt_EffectCacheSize.sz) {
size_t slot = _cache.next_free++;
RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use.");
_cache.slots[slot].refcount = 1;
if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) {
rtLog("GFX", "Failed to insert effect %x into the lookup table.", id);
_cache.slots[slot].refcount = 0;
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot);
return SIZE_MAX;
}
_cache.slots[slot].resource = id;
return slot;
} else if (!rtMinheapIsEmpty(&_cache.reclaim_heap)) {
size_t slot;
rtMinheapPop(&_cache.reclaim_heap, &slot);
RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use.");
_cache.slots[slot].refcount = 1;
rt_resource_id old_id = _cache.slots[slot].resource;
RT_ASSERT(old_id != RT_INVALID_RESOURCE_ID, "The slot should contain an old effect.");
ReleaseEffect(&_cache.slots[slot].effect);
rtHashtableRemove(&_cache.lut, old_id);
if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) {
rtLog("GFX", "Failed to insert effect %x into the lookup table.", id);
_cache.slots[slot].refcount = 0;
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot);
return SIZE_MAX;
}
_cache.slots[slot].resource = id;
return slot;
} else {
rtLog("GFX",
"Could not insert effect %x into the cache, because the effect cache is full.",
id);
return SIZE_MAX;
}
}
/* Load resource to memory allocated on the given arena */
static rt_result LoadResource(rt_resource_id id, void **p_out, rt_arena *arena) {
size_t size = rtGetResourceSize(id);
if (!size) {
rtLog("GFX", "ID %x is not a valid resource.", id);
return RT_INVALID_VALUE;
}
void *dst = rtArenaPush(arena, size);
if (!dst) {
rtLog("GFX", "Failed to allocate %zu bytes of temporary storage.", size);
return RT_OUT_OF_MEMORY;
}
*p_out = dst;
return rtGetResource(id, dst);
}
static rt_result LoadEffect(rt_resource_id id, rt_effect *effect) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena) {
rtLog("GFX", "Could not get a temporary arena.");
return RT_OUT_OF_MEMORY;
}
const rt_resource *resource = NULL;
rt_result res = LoadResource(id, &resource, temp.arena);
if (res != RT_SUCCESS) {
rtReturnTemporaryArena(temp);
return res;
}
if (resource->type != RT_RESOURCE_EFFECT) {
rtReturnTemporaryArena(temp);
rtLog("GFX", "Resource %x does not refer to an effect resource.", id);
return RT_INVALID_VALUE;
}
const rt_effect_info *effect_info = resource->data;
effect->pass_count = effect_info->pass_count;
for (unsigned int i = 0; i < effect_info->pass_count; ++i) {
rt_resource *pipeline_resource = NULL;
res = LoadResource(effect_info->passes[i].pipeline, &pipeline_resource, temp.arena);
if (res != RT_SUCCESS) {
rtReturnTemporaryArena(temp);
return res;
}
if (pipeline_resource->type != RT_RESOURCE_PIPELINE) {
rtReturnTemporaryArena(temp);
rtLog("GFX", "Resource %x does not refer to a pipeline resource.", id);
return RT_INVALID_VALUE;
}
rt_pipeline_info *pipeline_info = pipeline_resource->data;
rt_pipeline_handle pipeline = g_renderer.CompilePipeline(pipeline_info);
if (!RT_IS_HANDLE_VALID(pipeline)) {
rtReturnTemporaryArena(temp);
rtLog("GFX",
"Failed to compile the pipeline of pass %d (%x).",
i,
effect_info->passes[i].pass_id);
return RT_UNKNOWN_ERROR;
}
effect->passes[i].pass_id = effect_info->passes[i].pass_id;
effect->passes[i].pipeline = pipeline;
}
rtReturnTemporaryArena(temp);
return RT_SUCCESS;
}
RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect) {
rtAtomic32Inc(&_cache.usage_counter);
/* Check if the effect is already loaded */
rtLockRead(&_cache.lock);
uint64_t slot = rtHashtableLookup(&_cache.lut, id, UINT64_MAX);
if (slot != UINT64_MAX) {
RT_ASSERT(_cache.slots[slot].resource == id, "Got the wrong effect");
rtAtomic32Inc(&_cache.slots[slot].refcount);
*effect = &_cache.slots[slot].effect;
rtUnlockRead(&_cache.lock);
return RT_SUCCESS;
}
rtUnlockRead(&_cache.lock);
/* Load the effect */
rtLockWrite(&_cache.lock);
if (rtHashtableLookup(&_cache.lut, id, UINT64_MAX) != UINT64_MAX) {
/* Another thread was faster than we, just retry */
rtUnlockWrite(&_cache.lock);
return rtLoadEffect(id, effect);
}
slot = ReserveSlot(id);
if (slot == SIZE_MAX) {
rtUnlockWrite(&_cache.lock);
return RT_OUT_OF_MEMORY;
}
rt_result res = LoadEffect(id, &_cache.slots[slot].effect);
rtUnlockWrite(&_cache.lock);
*effect = &_cache.slots[slot].effect;
return res;
}
RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect) {
RT_VERIFY(effect);
rt_effect_cache_slot *slot = (rt_effect_cache_slot *)((char *)effect - offsetof(rt_effect_cache_slot, effect));
if (rtAtomic32Dec(&slot->refcount) == 0) {
rtLockWrite(&_cache.lock);
size_t slot_index = (size_t)(slot - _cache.slots);
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot_index);
rtUnlockWrite(&_cache.lock);
}
}

View File

@ -1,60 +0,0 @@
#ifndef RT_GFX_EFFECT_H
#define RT_GFX_EFFECT_H
/* A effect lists the passes during which an object needs to be rendered
* and a pipeline for each pass.
* The effect also defines the required vertex layout per pass.
*/
#include "gfx.h"
#include "renderer_api.h"
#include "runtime/resources.h"
/* *** Resource types *** */
typedef struct rt_pipeline_info_s {
rt_resource_id vertex_shader;
rt_resource_id fragment_shader;
rt_resource_id compute_shader;
/* TODO(Kevin): Fixed function settings */
} rt_pipeline_info;
typedef struct {
/* Id of the render pass during which this effect pass is run. */
uint32_t pass_id;
rt_resource_id pipeline;
} rt_effect_pass_info;
typedef struct {
uint32_t pass_count;
rt_effect_pass_info passes[RT_MAX_SUBRESOURCES];
} rt_effect_info;
/* *** Runtime types *** */
typedef struct {
uint32_t pass_id;
rt_pipeline_handle pipeline;
} rt_effect_pass;
typedef struct {
uint32_t pass_count;
rt_effect_pass passes[RT_MAX_SUBRESOURCES];
} rt_effect;
RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len);
RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len);
/* Load an effect from a resource file.
* Returns:
* - RT_SUCCESS
* - RT_OUT_OF_MEMORY, if temporary memory allocations failed
* - RT_INVALID_VALUE, if id does not refer to an effect resource.
* - RT_UNKNOWN_ERROR, if a pipeline failed to compile
* - errors returned by rtGetResource() */
RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect);
RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect);
#endif

View File

@ -1,88 +0,0 @@
#ifndef RT_GFX_H
#define RT_GFX_H
/* graphics system. this is the interface of the rendering code.
*
* we need (at least) three different renderers:
* - world cell renderer (for world & dungeon environments)
* - character renderer (for animated models)
* - object renderer (for static models)
*/
#include <stdint.h>
#include "runtime/runtime.h"
#ifdef __cplusplus
extern "C" {
#endif
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#elif defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4201) /* anonymous struct */
#endif
typedef union {
float v[4];
struct {
float r;
float g;
float b;
float a;
};
} rt_color;
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#elif defined(_MSC_VER)
#pragma warning(pop)
#endif
/* NOTE(kevin): When you add a value here, you need to handle them in
* framegraph_processor.c : ParseFramegraph
* and in the render target and texture functions of all renderers. */
typedef enum {
RT_PIXEL_FORMAT_INVALID,
RT_PIXEL_FORMAT_R8G8B8A8_UNORM,
RT_PIXEL_FORMAT_B8G8R8A8_UNORM,
RT_PIXEL_FORMAT_R8G8B8A8_SRGB,
RT_PIXEL_FORMAT_B8G8R8A8_SRGB,
RT_PIXEL_FORMAT_R8G8B8_UNORM,
RT_PIXEL_FORMAT_B8G8R8_UNORM,
RT_PIXEL_FORMAT_R8G8B8_SRGB,
RT_PIXEL_FORMAT_B8G8R8_SRGB,
RT_PIXEL_FORMAT_DEPTH24_STENCIL8,
RT_PIXEL_FORMAT_DEPTH32,
/* Special value indicating whichever format the swapchain uses */
RT_PIXEL_FORMAT_SWAPCHAIN,
RT_PIXEL_FORMAT_count,
} rt_pixel_format;
RT_INLINE int rtIsDepthFormat(rt_pixel_format format) {
return format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8 || format == RT_PIXEL_FORMAT_DEPTH32;
}
/* In renderer_api.h -> Not necessary for almost all gfx usage */
typedef struct rt_renderer_init_info_s rt_renderer_init_info;
RT_DLLEXPORT void rtRegisterRendererCVars(void);
RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info);
RT_DLLEXPORT void rtShutdownGFX(void);
RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id);
RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,224 +0,0 @@
#include <stdbool.h>
#include <string.h>
#define RT_DONT_DEFINE_RENDERER_GLOBAL
#include "gfx.h"
#include "renderer_api.h"
#include "runtime/config.h"
#include "runtime/dynamic_libs.h"
/* Attributes are used to bind buffers (or textures) to symbolic values.
* For example, an attribute might be bound to "CELL_GRID", which would be
* replaced with the (at the time of the invoke) grid buffer of the current
* world cell.
*/
rt_renderer_api g_renderer;
#ifndef RT_STATIC_LIB
static rt_dynlib _renderer_lib;
#endif
static bool _renderer_loaded = false;
RT_DLLEXPORT
RT_CVAR_S(rt_Renderer,
"Select the render backend. Available options: [vk, dx11, null], Default: vk",
"dx11");
extern rt_cvar rt_RenderViewArenaSize;
extern rt_cvar rt_RenderListPoolSize;
#ifdef RT_STATIC_LIB
extern void RT_RENDERER_API_FN(RegisterCVars)(void);
extern rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *);
extern void RT_RENDERER_API_FN(Shutdown)(void);
extern unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void);
extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int);
extern void RT_RENDERER_API_FN(EndFrame)(unsigned int);
extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *);
extern void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle);
extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t,
const rt_alloc_command_buffer_info *,
rt_command_buffer_handle *);
extern rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue,
const rt_submit_command_buffers_info *);
extern rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t,
const rt_gpu_semaphore_info *,
rt_gpu_semaphore_handle *);
extern void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *);
extern uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle);
extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void);
extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void);
extern rt_result
RT_RENDERER_API_FN(CreateBuffers)(uint32_t, const rt_buffer_info *, rt_buffer_handle *);
extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *);
extern rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void);
extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *);
extern rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *, unsigned int);
extern void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
uint32_t pass_id,
rt_render_view view,
unsigned int frame_id);
extern void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph, unsigned int frame_id);
extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle,
const rt_cmd_begin_pass_info *);
extern void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle);
extern void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle,
rt_render_target_handle,
rt_render_target_state);
extern void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle,
rt_render_target_handle);
extern void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle, rt_pipeline_handle);
extern void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle,
uint32_t,
uint32_t,
const rt_buffer_handle *,
const uint32_t *,
const uint32_t *);
extern void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle, uint32_t, uint32_t);
#endif
extern rt_result InitRenderLists(void);
extern void ShutdownRenderLists(void);
extern void ResetRenderLists(unsigned int frame_id);
extern rt_result InitRenderViews(void);
extern void ShutdownRenderViews(void);
extern void ResetRenderViews(unsigned int frame_id);
extern rt_result InitEffectCache(void);
extern void ShutdownEffectCache(void);
static bool LoadRenderer(void) {
#if !defined(RT_STATIC_LIB)
#define RETRIEVE_SYMBOL(name, type) \
g_renderer.name = (type *)rtGetSymbol(_renderer_lib, "rtRen" #name); \
if (!g_renderer.name) { \
rtReportError("GFX", \
"Unable to retrieve renderer function %s from backend %s", \
#name, \
rt_Renderer.s); \
}
if (strcmp(rt_Renderer.s, "vk") == 0) {
_renderer_lib = rtOpenLib(RT_DLLNAME("rtvk"));
if (!_renderer_lib) {
rtReportError("GFX", "Unable to load renderer backend: %s", RT_DLLNAME("rtvk"));
return false;
}
RETRIEVE_SYMBOL(RegisterCVars, rt_register_renderer_cvars_fn);
RETRIEVE_SYMBOL(Init, rt_init_renderer_fn);
RETRIEVE_SYMBOL(Shutdown, rt_shutdown_renderer_fn);
RETRIEVE_SYMBOL(GetMaxFramesInFlight, rt_get_max_frames_in_flight_fn);
RETRIEVE_SYMBOL(BeginFrame, rt_begin_frame_fn);
RETRIEVE_SYMBOL(EndFrame, rt_end_frame_fn);
RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn);
RETRIEVE_SYMBOL(DestroyPipeline, rt_destroy_pipeline_fn);
RETRIEVE_SYMBOL(AllocCommandBuffers, rt_alloc_command_buffers_fn);
RETRIEVE_SYMBOL(SubmitCommandBuffers, rt_submit_command_buffers_fn);
RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn);
RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn);
RETRIEVE_SYMBOL(CreateRenderGrapbuilder, rt_create_render_graph_builder_fn);
RETRIEVE_SYMBOL(DestroyRenderGraphBuilder, rt_destroy_render_graph_builder_fn);
RETRIEVE_SYMBOL(ExecuteRenderGraph, rt_execute_render_graph_fn);
RETRIEVE_SYMBOL(SubmitRenderView, rt_submit_render_view_fn);
RETRIEVE_SYMBOL(ResetRenderGraph, rt_reset_render_graph_fn);
RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn);
RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn);
RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn);
RETRIEVE_SYMBOL(CmdFlushRenderTargetWrite, rt_cmd_flush_render_target_write_fn);
RETRIEVE_SYMBOL(CmdBindPipeline, rt_cmd_bind_pipeline_fn);
RETRIEVE_SYMBOL(CmdBindVertexBuffers, rt_cmd_bind_vertex_buffers_fn);
RETRIEVE_SYMBOL(CmdDraw, rt_cmd_draw_fn);
} else {
rtReportError("GFX",
"Unsupported renderer backend: (%s) %s",
rt_Renderer.name,
rt_Renderer.s);
return false;
}
#undef RETRIEVE_SYMBOL
#else
g_renderer.RegisterCVars = &rtRenRegisterCVars;
g_renderer.Init = &rtRenInit;
g_renderer.Shutdown = &rtRenShutdown;
g_renderer.GetMaxFramesInFlight = &rtRenGetMaxFramesInFlight;
g_renderer.BeginFrame = &rtRenBeginFrame;
g_renderer.EndFrame = &rtRenEndFrame;
g_renderer.CompilePipeline = &rtRenCompilePipeline;
g_renderer.DestroyPipeline = &rtRenDestroyPipeline;
g_renderer.AllocCommandBuffers = &rtRenAllocCommandBuffers;
g_renderer.SubmitCommandBuffers = &rtRenSubmitCommandBuffers;
g_renderer.CreateBuffers = &rtRenCreateBuffers;
g_renderer.DestroyBuffers = &rtRenDestroyBuffers;
g_renderer.CreateRenderGraphBuilder = &rtRenCreateRenderGraphBuilder;
g_renderer.DestroyRenderGraphBuilder = &rtRenDestroyRenderGraphBuilder;
g_renderer.ExecuteRenderGraph = &rtRenExecuteRenderGraph;
g_renderer.SubmitRenderView = &rtRenSubmitRenderView;
g_renderer.ResetRenderGraph = &rtRenResetRenderGraph;
g_renderer.CmdBeginPass = &rtRenCmdBeginPass;
g_renderer.CmdEndPass = &rtRenCmdEndPass;
g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget;
g_renderer.CmdFlushRenderTargetWrite = &rtRenCmdFlushRenderTargetWrite;
g_renderer.CmdBindPipeline = &rtRenCmdBindPipeline;
g_renderer.CmdBindVertexBuffers = &rtRenCmdBindVertexBuffers;
g_renderer.CmdDraw = &rtRenCmdDraw;
#endif
return true;
}
RT_DLLEXPORT void rtRegisterRendererCVars(void) {
if (!_renderer_loaded) {
if (!LoadRenderer())
return;
_renderer_loaded = true;
}
g_renderer.RegisterCVars();
}
RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) {
rtRegisterCVAR(&rt_Renderer);
rtRegisterCVAR(&rt_RenderViewArenaSize);
rtRegisterCVAR(&rt_RenderListPoolSize);
if (!_renderer_loaded) {
if (!LoadRenderer())
return RT_UNKNOWN_ERROR;
g_renderer.RegisterCVars();
}
rt_result result;
if ((result = g_renderer.Init(renderer_info)) != RT_SUCCESS)
return result;
if ((result = InitRenderLists()) != RT_SUCCESS)
return result;
if ((result = InitRenderViews()) != RT_SUCCESS)
return result;
if ((result = InitEffectCache()) != RT_SUCCESS)
return result;
return result;
}
RT_DLLEXPORT void rtShutdownGFX(void) {
ShutdownEffectCache();
ShutdownRenderViews();
ShutdownRenderLists();
g_renderer.Shutdown();
}
RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id) {
g_renderer.BeginFrame(frame_id);
}
RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id) {
g_renderer.EndFrame(frame_id);
ResetRenderLists(frame_id);
ResetRenderViews(frame_id);
}

View File

@ -1,25 +0,0 @@
gfx_deps = [thread_dep, m_dep]
gfx_lib = library('rtgfx',
# Project Sources
'builtin_objects.h',
'effect.h',
'gfx.h',
'renderer_api.h',
'render_list.h',
'render_view.h',
'builtin_objects.c',
'effect.c',
'gfx_main.c',
'render_list.c',
'render_view.c',
# Contrib Sources
dependencies : gfx_deps,
include_directories : engine_incdir,
link_with : runtime_lib,
c_pch : 'pch/gfx_pch.h',
install : true)
engine_libs += gfx_lib
engine_lib_paths += gfx_lib.full_path()

View File

@ -1,11 +0,0 @@
/* Stdlib */
#include <stdint.h>
/* Project */
#include "gfx.h"
/* Commonly used runtime headers */
#include "runtime/runtime.h"
#include "runtime/threading.h"
#include "runtime/mem_arena.h"
#include "runtime/config.h"

View File

@ -1,207 +0,0 @@
#include "render_list.h"
#include "renderer_api.h"
#include "runtime/config.h"
#include "runtime/mem_arena.h"
#include "runtime/threading.h"
#include <string.h>
RT_CVAR_I(rt_RenderListPoolSize,
"Size of the pool allocated for render lists in bytes. Default: 8 MiB",
RT_MB(8));
typedef struct {
size_t size;
const char *name;
} rt_render_object_type_data;
typedef struct rt_list_pool_s {
size_t capacity;
struct rt_list_pool_s *next;
} rt_list_pool;
typedef struct {
rt_mutex *lock;
rt_list_pool *first_free;
rt_arena arena;
unsigned int access_frame_id;
} rt_frame_lists;
#define DEFAULT_LIST_CAPACITY RT_KB(1)
static rt_render_object_type_data _types[RT_MAX_RENDER_OBJECT_TYPE + 1];
static unsigned int _type_count = 0;
static rt_rwlock _type_lock;
static rt_frame_lists _frame_lists[4];
static unsigned int _max_frames_in_flight;
rt_result InitRenderLists(void) {
rt_create_rwlock_result lock_res = rtCreateRWLock();
if (!lock_res.ok)
return RT_UNKNOWN_ERROR;
_type_lock = lock_res.lock;
_max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frame_lists),
"Invalid maxium number of in-flight frames.");
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderListPoolSize.i);
if (!arena_res.ok) {
rtDestroyRWLock(&_type_lock);
return RT_OUT_OF_MEMORY;
}
_frame_lists[i].arena = arena_res.arena;
_frame_lists[i].lock = rtCreateMutex();
if (!_frame_lists[i].lock) {
rtReleaseArena(&_frame_lists[i].arena);
rtDestroyRWLock(&_type_lock);
}
_frame_lists[i].first_free = NULL;
_frame_lists[i].access_frame_id = 0;
}
return RT_SUCCESS;
}
void ShutdownRenderLists(void) {
rtDestroyRWLock(&_type_lock);
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
rtDestroyMutex(_frame_lists[i].lock);
rtReleaseArena(&_frame_lists[i].arena);
}
}
RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size,
const char *debug_name) {
if (_type_count == RT_MAX_RENDER_OBJECT_TYPE) {
rtReportError("GFX", "Too many render object types (max is %u)", RT_MAX_RENDER_OBJECT_TYPE);
return RT_INVALID_RENDER_OBJECT_TYPE;
}
rtLockWrite(&_type_lock);
rt_render_object_type type = (rt_render_object_type)++_type_count;
_types[_type_count].size = object_size;
_types[_type_count].name = debug_name;
if (debug_name)
rtLog("GFX",
"Registered render object type %s; object size: %zu. Type: %u",
debug_name,
object_size,
_type_count);
else
rtLog("GFX",
"Registered unnamed render object type; object size: %zu. Type: %u",
object_size,
_type_count);
rtUnlockWrite(&_type_lock);
return type;
}
RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type) {
size_t size = 0;
rtLockRead(&_type_lock);
if (type > RT_INVALID_RENDER_OBJECT_TYPE && type <= _type_count)
size = _types[type].size;
rtUnlockRead(&_type_lock);
return size;
}
RT_DLLEXPORT const char *rtGetRenderObjectTypeDebugName(rt_render_object_type type) {
const char *name = NULL;
rtLockRead(&_type_lock);
if (type > RT_INVALID_RENDER_OBJECT_TYPE && type <= _type_count)
name = _types[type].name;
rtUnlockRead(&_type_lock);
return name;
}
static rt_create_render_list_result
CreateNewList(rt_render_object_type type, unsigned int frame_id, size_t capacity) {
rt_create_render_list_result res = {.ok = false};
unsigned int slot = frame_id % _max_frames_in_flight;
rtLockMutex(_frame_lists[slot].lock);
_frame_lists[slot].access_frame_id = frame_id;
if (!_frame_lists[slot].first_free ||
_frame_lists[slot].first_free->capacity < capacity) { /* Allocate a new list */
rt_list_pool *pool = rtArenaPush(&_frame_lists[slot].arena,
sizeof(rt_list_pool) + sizeof(unsigned int) + capacity);
if (!pool) {
rtReportError("GFX",
"Out of render list pool space! Configured space: %d kiB",
rt_RenderListPoolSize.i / 1024);
goto out;
}
pool->capacity = capacity;
pool->next = _frame_lists[slot].first_free;
_frame_lists[slot].first_free = pool;
}
rt_render_list list;
unsigned int *frame_id_store =
(unsigned int *)((char *)_frame_lists[slot].first_free + sizeof(rt_list_pool));
*frame_id_store = frame_id;
list.data = (char *)_frame_lists[slot].first_free + sizeof(rt_list_pool) + sizeof(unsigned int);
list.type = type;
list.length = 0;
res.ok = true;
res.list = list;
_frame_lists[slot].first_free = _frame_lists[slot].first_free->next;
out:
rtUnlockMutex(_frame_lists[slot].lock);
return res;
}
RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type,
unsigned int frame_id) {
return CreateNewList(type, frame_id, DEFAULT_LIST_CAPACITY);
}
void ResetRenderLists(unsigned int frame_id) {
unsigned int slot = frame_id % _max_frames_in_flight;
RT_ASSERT(_frame_lists[slot].access_frame_id == frame_id ||
_frame_lists[slot].access_frame_id == 0,
"Frame id mismatch");
rtLockMutex(_frame_lists[slot].lock);
_frame_lists[slot].first_free = NULL;
_frame_lists[slot].access_frame_id = 0;
rtArenaClear(&_frame_lists[slot].arena);
rtUnlockMutex(_frame_lists[slot].lock);
}
RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object) {
size_t object_size = rtGetRenderObjectSize(list->type);
rt_list_pool *pool =
(rt_list_pool *)((char *)list->data - sizeof(rt_list_pool) - sizeof(unsigned int));
unsigned int frame_id = *(unsigned int *)((char *)list->data - sizeof(unsigned int));
size_t list_capacity = pool->capacity / object_size;
if (list->length == list_capacity) {
/* "Grow" the list */
rt_create_render_list_result list_res =
CreateNewList(list->type, frame_id, pool->capacity * 2);
if (!list_res.ok)
return false;
memcpy(list_res.list.data, list->data, list->length * object_size);
unsigned int slot = frame_id % _max_frames_in_flight;
rtLockMutex(_frame_lists[slot].lock);
pool->next = _frame_lists[slot].first_free;
_frame_lists[slot].first_free = pool;
rtUnlockMutex(_frame_lists[slot].lock);
list_res.list.length = list->length;
*list = list_res.list;
}
char *dst = (char *)list->data + list->length * object_size;
memcpy(dst, object, object_size);
++list->length;
return true;
}

View File

@ -1,70 +0,0 @@
#ifndef RT_RENDER_LIST_H
#define RT_RENDER_LIST_H
/* a render list collects render objects. */
#include <stdint.h>
#include <stdbool.h>
#include "runtime/runtime.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Identifies a type of render objects. */
typedef uint32_t rt_render_object_type;
typedef uint32_t rt_render_object_type_mask;
enum {
RT_INVALID_RENDER_OBJECT_TYPE = 0,
RT_MAX_RENDER_OBJECT_TYPE = 32,
};
#define RT_RENDER_OBJECT_TYPE_BIT(type) (1u << ((type)-1))
/* Registers a new render object type.
* debug_name is optional and may be NULL.
*/
RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size,
const char *debug_name);
RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type);
RT_DLLEXPORT const char *rtGetRenderObjectTypeDebugName(rt_render_object_type type);
typedef struct {
rt_render_object_type type;
size_t length;
void *data;
} rt_render_list;
/* Returns a pointer to the i-th render list element.
* Works for every valid type, because the size is determined at runtime. */
RT_INLINE void *rtGetRenderListElement(const rt_render_list *list, size_t index) {
size_t size = rtGetRenderObjectSize(list->type);
return (char *)list->data + size * index;
}
/* Returns the i-th render list element, cast to type T.
* Saves a rtGetRenderObjectSize call, if the type is known beforehand. */
#define RT_GET_RENDER_LIST_ELEMENT(list, T, index) *(((T *)(list).data) + (index))
typedef struct {
bool ok;
rt_render_list list;
} rt_create_render_list_result;
/* Create a render list for a particular object type.
*
* Render Lists have a lifetime of one frame. */
RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type, unsigned int frame_id);
/* Append a render object to a list. The object must be of the correct type. */
RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,122 +0,0 @@
#include "render_view.h"
#include "renderer_api.h"
#include "runtime/config.h"
#include "runtime/mem_arena.h"
#include "runtime/threading.h"
RT_CVAR_I(rt_RenderViewArenaSize,
"Size of the memory arena used for allocating render views. Default: 1 MB",
RT_MB(1));
typedef struct {
rt_arena arena;
rt_mutex *lock;
uint32_t frame_id;
} rt_frame_views;
static rt_frame_views _frames[4];
static unsigned int _max_frames_in_flight;
rt_result InitRenderViews(void) {
_max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frames),
"Invalid maximum number of in-flight frames.");
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderViewArenaSize.i);
if (!arena_res.ok)
return RT_OUT_OF_MEMORY;
_frames[i].arena = arena_res.arena;
_frames[i].lock = rtCreateMutex();
if (!_frames[i].lock) {
rtReleaseArena(&_frames[i].arena);
return RT_UNKNOWN_ERROR;
}
_frames[i].frame_id = 0;
}
return RT_SUCCESS;
}
void ShutdownRenderViews(void) {
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
rtDestroyMutex(_frames[i].lock);
rtReleaseArena(&_frames[i].arena);
}
}
void ResetRenderViews(unsigned int frame_id) {
unsigned int slot = frame_id % _max_frames_in_flight;
rtArenaClear(&_frames[slot].arena);
}
RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types,
uint32_t type_count,
unsigned int frame_id) {
#ifdef RT_DEBUG
for (uint32_t i = 0; i < type_count - 1; ++i) {
for (uint32_t j = i + 1; j < type_count; ++j) {
RT_ASSERT(types[i] != types[j], "Duplicate render list type detected.");
}
}
#endif
unsigned int slot = frame_id % _max_frames_in_flight;
size_t size = type_count * (sizeof(rt_render_list) + sizeof(rt_render_list));
rtLockMutex(_frames[slot].lock);
void *storage = rtArenaPush(&_frames[slot].arena, size);
_frames[slot].frame_id = frame_id;
rtUnlockMutex(_frames[slot].lock);
if (!storage) {
return (rt_create_render_view_result){
.ok = false,
};
}
rt_render_view view;
view.lists = storage;
view.list_types = (rt_render_object_type *)(view.lists + type_count);
view.list_count = type_count;
view.type_mask = 0;
for (uint32_t i = 0; i < type_count; ++i) {
rt_create_render_list_result list_res = rtCreateRenderList(types[i], frame_id);
if (!list_res.ok) {
return (rt_create_render_view_result){
.ok = false,
};
}
view.lists[i] = list_res.list;
view.list_types[i] = types[i];
view.type_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]);
}
return (rt_create_render_view_result){.ok = true, .view = view};
}
RT_DLLEXPORT bool
rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object) {
for (uint32_t i = 0; i < view->list_count; ++i) {
if (view->list_types[i] == type)
return rtPushRenderListEntry(&view->lists[i], object);
}
return false;
}
RT_DLLEXPORT void
rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id, unsigned int frame_id) {
g_renderer.SubmitRenderView(render_graph, pass_id, view, frame_id);
}
RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view,
const rt_render_object_type *types,
uint32_t type_count) {
if (view.list_count != type_count)
return 0;
for (uint32_t i = 0; i < type_count; ++i) {
if (view.list_types[i] != types[i])
return 0;
}
return 1;
}

View File

@ -1,63 +0,0 @@
#ifndef RT_GFX_RENDER_VIEW_H
#define RT_GFX_RENDER_VIEW_H
/* A render view acts as a container of one or more render lists.
* Each view is processed by exactly one pass. */
#include "render_list.h"
typedef struct rt_render_graph_s rt_render_graph;
typedef struct {
rt_render_list *lists;
rt_render_object_type *list_types;
uint32_t list_count;
rt_render_object_type_mask type_mask;
} rt_render_view;
typedef struct {
bool ok;
rt_render_view view;
} rt_create_render_view_result;
#ifdef __cplusplus
extern "C" {
#endif
RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types,
uint32_t type_count,
unsigned int frame_id);
RT_DLLEXPORT bool
rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object);
RT_DLLEXPORT void rtSubmitRenderView(rt_render_view view,
rt_render_graph *render_graph,
uint32_t pass_id,
unsigned int frame_id);
/* Checks if the view contains exactly the given types in the given order */
RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view,
const rt_render_object_type *types,
uint32_t type_count);
/* Checks if the view contains exactly the given types, in any order */
RT_INLINE static int
rtDoViewTypesMatch(rt_render_view view, const rt_render_object_type *types, uint32_t type_count) {
rt_render_object_type_mask in_mask = 0;
for (uint32_t i = 0; i < type_count; ++i) {
in_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]);
}
return view.type_mask == in_mask;
}
RT_INLINE static int rtDoesViewContainTypes(rt_render_view view,
rt_render_object_type_mask type_mask) {
return (int)(view.type_mask & type_mask);
}
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,367 +0,0 @@
#ifndef RT_GFX_BACKEND_H
#define RT_GFX_BACKEND_H
/* Backend functions and types. */
#include <stddef.h>
#include "gfx.h"
#include "render_list.h"
#include "render_view.h"
#include "runtime/resources.h"
#include "runtime/rt_math.h"
#include "runtime/runtime.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Handles for backend objects */
#define RT_RENDER_BACKEND_HANDLE_MAX_INDEX ((1u << 24) - 1)
#define RT_RENDER_BACKEND_HANDLE_MAX_VERSION 255
#define RT_RENDER_BACKEND_HANDLE(name) \
typedef struct { \
uint32_t version : 8; \
uint32_t index : 24; \
} name
RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle);
RT_RENDER_BACKEND_HANDLE(rt_render_target_handle);
RT_RENDER_BACKEND_HANDLE(rt_command_buffer_handle);
RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle);
RT_RENDER_BACKEND_HANDLE(rt_buffer_handle);
#undef RT_RENDER_BACKEND_HANDLE
#define RT_COMPARE_RENDER_HANDLES(_A, _B, _Comp) ((*(uint32_t *)&(_A)) _Comp (*(uint32_t *)&(_B)))
/* Init data for the renderer */
#ifdef _WIN32
struct HINSTANCE__;
struct HWND__;
#elif defined(RT_USE_XLIB)
struct _XDisplay;
#endif
struct rt_renderer_init_info_s {
#ifdef _WIN32
struct HINSTANCE__ *hInstance;
struct HWND__ *hWnd;
#elif defined(RT_USE_XLIB)
struct _XDisplay *display;
unsigned long window;
#endif
};
/* Argument types for render commands */
typedef enum {
RT_GRAPHICS_QUEUE,
RT_COMPUTE_QUEUE,
RT_TRANSFER_QUEUE,
} rt_gpu_queue;
#if 0
/* Attributes are used to bind buffers (or textures) to symbolic values.
* For example, an attribute might be bound to "CELL_GRID", which would be
* replaced with the (at the time of the invoke) grid buffer of the current
* world cell.
*/
typedef enum {
RT_ATTRIBUTE_VALUE_UNDEFINED,
RT_ATTRIBUTE_VALUE_MATERIAL_ALBEDO,
RT_ATTRIBUTE_VALUE_MATERIAL_NORMAL,
RT_ATTRIBUTE_VALUE_count
} rt_attribute_value;
typedef struct {
uint32_t index;
rt_attribute_value value;
} rt_attribute_binding;
#endif
typedef enum {
RT_SHADER_TYPE_INVALID,
RT_SHADER_TYPE_VULKAN,
RT_SHADER_TYPE_DX11,
RT_SHADER_TYPE_count,
} rt_shader_type;
typedef enum {
RT_SHADER_STAGE_VERTEX,
RT_SHADER_STAGE_FRAGMENT,
RT_SHADER_STAGE_COMPUTE,
RT_SHADER_STAGE_count,
} rt_shader_stage;
typedef struct {
rt_shader_type type;
rt_shader_stage stage;
rt_relptr bytecode;
size_t bytecode_length;
} rt_shader_info;
typedef struct {
rt_gpu_queue target_queue;
} rt_alloc_command_buffer_info;
typedef struct {
const rt_command_buffer_handle *command_buffers;
const rt_gpu_semaphore_handle *wait_semaphores;
const uint64_t *wait_values;
const rt_gpu_semaphore_handle *signal_semaphores;
const uint64_t *signal_values;
uint32_t command_buffer_count;
uint32_t wait_semaphore_count;
uint32_t signal_semaphore_count;
} rt_submit_command_buffers_info;
typedef struct {
/* Optional, for debug purposes */
const char *name;
uint64_t initial_value;
} rt_gpu_semaphore_info;
typedef enum {
RT_BUFFER_TYPE_VERTEX,
RT_BUFFER_TYPE_INDEX,
RT_BUFFER_TYPE_UNIFORM,
RT_BUFFER_TYPE_STORAGE,
RT_BUFFER_TYPE_count
} rt_buffer_type;
typedef enum {
/* Create once, never change the data. */
RT_BUFFER_USAGE_STATIC,
/* Update occasionally (after a number of frames) */
RT_BUFFER_USAGE_DYNAMIC,
/* Create, use once and then discard */
RT_BUFFER_USAGE_TRANSIENT,
RT_BUFFER_USAGE_count,
} rt_buffer_usage;
typedef struct {
size_t size;
rt_buffer_type type;
rt_buffer_usage usage;
const void *data;
} rt_buffer_info;
typedef enum {
RT_PASS_LOAD_MODE_LOAD,
RT_PASS_LOAD_MODE_CLEAR,
} rt_pass_load_mode;
typedef enum {
RT_PASS_WRITE_MODE_STORE,
RT_PASS_WRITE_MODE_DISCARD,
} rt_pass_write_mode;
typedef union {
rt_color color;
struct {
float depth;
int32_t stencil;
} depth_stencil;
} rt_pass_clear_value;
typedef struct {
float depth;
int32_t stencil;
} rt_depth_stencil_value;
typedef struct {
rt_render_target_handle color_buffers[4];
rt_pass_load_mode color_buffer_loads[4];
rt_pass_write_mode color_buffer_writes[4];
rt_pass_clear_value color_buffer_clear_values[4];
uint32_t color_buffer_count;
rt_render_target_handle depth_stencil_buffer;
rt_pass_load_mode depth_stencil_buffer_load;
rt_pass_write_mode depth_stencil_buffer_write;
rt_pass_clear_value depth_stencil_buffer_clear_value;
rt_rect2i render_area;
// For debug purposes, can be NULL
const char *name;
} rt_cmd_begin_pass_info;
typedef enum {
/* Unusable, must be transitioned to an usable state first. */
RT_RENDER_TARGET_STATE_INVALID,
/* Used as a color- or depth-buffer */
RT_RENDER_TARGET_STATE_ATTACHMENT,
RT_RENDER_TARGET_STATE_SAMPLED_IMAGE,
RT_RENDER_TARGET_STATE_STORAGE_IMAGE,
} rt_render_target_state;
#define RT_RENDER_TARGET_SIZE_SWAPCHAIN 0
/* Renderer API */
typedef struct rt_pipeline_info_s rt_pipeline_info;
typedef struct {
const char *name;
rt_pixel_format format;
unsigned int width;
unsigned int height;
unsigned int samples;
unsigned int layers;
} rt_attachment_info;
enum {
/* Bit 0 contains the type: 0 -> graphics, 1 -> compute */
RT_PASS_FLAG_GRAPHICS = 0x0000,
RT_PASS_FLAG_COMPUTE = 0x0001,
RT_PASS_FLAG_TYPE_MASK = RT_PASS_FLAG_COMPUTE | RT_PASS_FLAG_GRAPHICS,
/* Always excecute the pass, even if no objects will be rendered. */
RT_PASS_FLAG_EXECUTE_ALWAYS = 0x0002,
};
typedef struct {
const char *name;
uint32_t flags;
} rt_pass_info;
typedef struct rt_render_graph_s rt_render_graph;
typedef rt_result rt_execute_render_pass_fn(uint32_t pass_id,
rt_command_buffer_handle cmdbuf,
const rt_render_view *views,
unsigned int view_count,
void *userdata);
typedef struct {
void *obj;
void (*AddRenderTarget)(void *obj, const rt_attachment_info *info);
void (*SetBackbuffer)(void *obj, const char *rt_name);
void (*AddRenderPass)(void *obj, const rt_pass_info *info);
void (*AddColorOutput)(void *obj,
const char *pass_name,
const char *rt_name,
rt_pass_load_mode load,
rt_pass_write_mode write,
rt_color clear_color);
void (*AddSampledInput)(void *obj, const char *pass_name, const char *rt_name);
void (*SetDepthStencilAttachment)(void *obj,
const char *pass_name,
const char *rt_name,
rt_pass_load_mode load,
rt_pass_write_mode write,
rt_depth_stencil_value clear_value);
void (*SetRenderArea)(void *obj, const char *pass_name, rt_rect2 area, float min_depth, float max_depth);
void (*BindRenderPass)(void *obj,
const char *pass_name,
rt_execute_render_pass_fn *execute_fn,
void *userdata);
rt_result (*Build)(void *obj, rt_render_graph **p_render_graph);
} rt_render_graph_builder;
typedef void rt_register_renderer_cvars_fn(void);
typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info);
typedef void rt_shutdown_renderer_fn(void);
typedef unsigned int rt_get_max_frames_in_flight_fn(void);
typedef void rt_begin_frame_fn(unsigned int frame_id);
typedef void rt_end_frame_fn(unsigned int frame_id);
typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info);
typedef void rt_destroy_pipeline_fn(rt_pipeline_handle handle);
typedef rt_result rt_alloc_command_buffers_fn(uint32_t count,
const rt_alloc_command_buffer_info *info,
rt_command_buffer_handle *p_command_buffers);
typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue,
const rt_submit_command_buffers_info *info);
typedef rt_result
rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers);
typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers);
typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void);
typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder);
typedef rt_result rt_execute_render_graph_fn(rt_render_graph *rgraph, unsigned int frame_id);
typedef void
rt_submit_render_view_fn(rt_render_graph *render_graph, uint32_t pass_id, rt_render_view view, unsigned int frame_id);
typedef void rt_reset_render_graph_fn(rt_render_graph *graph, unsigned int frame_id);
typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf,
const rt_cmd_begin_pass_info *info);
typedef void rt_cmd_end_pass_fn(rt_command_buffer_handle cmdbuf);
typedef void rt_cmd_transition_render_target_fn(rt_command_buffer_handle cmdbuf,
rt_render_target_handle render_target,
rt_render_target_state new_state);
typedef void rt_cmd_flush_render_target_write_fn(rt_command_buffer_handle cmdbuf,
rt_render_target_handle render_target);
typedef void rt_cmd_bind_pipeline_fn(rt_command_buffer_handle cmd, rt_pipeline_handle pipeline);
typedef void rt_cmd_bind_vertex_buffers_fn(rt_command_buffer_handle cmd,
uint32_t first_binding,
uint32_t count,
const rt_buffer_handle *buffers,
const uint32_t *strides,
const uint32_t *offsets);
typedef void
rt_cmd_draw_fn(rt_command_buffer_handle cmdbuf, uint32_t first_vertex, uint32_t vertex_count);
typedef struct {
rt_register_renderer_cvars_fn *RegisterCVars;
rt_init_renderer_fn *Init;
rt_shutdown_renderer_fn *Shutdown;
rt_get_max_frames_in_flight_fn *GetMaxFramesInFlight;
rt_begin_frame_fn *BeginFrame;
rt_end_frame_fn *EndFrame;
rt_compile_pipeline_fn *CompilePipeline;
rt_destroy_pipeline_fn *DestroyPipeline;
rt_alloc_command_buffers_fn *AllocCommandBuffers;
rt_submit_command_buffers_fn *SubmitCommandBuffers;
rt_create_buffers_fn *CreateBuffers;
rt_destroy_buffers_fn *DestroyBuffers;
/*render graph functions*/
rt_create_render_graph_builder_fn *CreateRenderGraphBuilder;
rt_destroy_render_graph_builder_fn *DestroyRenderGraphBuilder;
rt_execute_render_graph_fn *ExecuteRenderGraph;
rt_submit_render_view_fn *SubmitRenderView;
rt_reset_render_graph_fn *ResetRenderGraph;
/* Command Buffer Functions */
rt_cmd_begin_pass_fn *CmdBeginPass;
rt_cmd_end_pass_fn *CmdEndPass;
rt_cmd_transition_render_target_fn *CmdTransitionRenderTarget;
rt_cmd_flush_render_target_write_fn *CmdFlushRenderTargetWrite;
rt_cmd_bind_pipeline_fn *CmdBindPipeline;
rt_cmd_bind_vertex_buffers_fn *CmdBindVertexBuffers;
rt_cmd_draw_fn *CmdDraw;
} rt_renderer_api;
#define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name
#ifndef RT_DONT_DEFINE_RENDERER_GLOBAL
extern rt_renderer_api g_renderer;
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,9 +1,3 @@
subdir('runtime') subdir('runtime')
subdir('asset_compiler') subdir('asset_compiler')
subdir('gfx')
subdir('app_framework') subdir('app_framework')
# Renderer libs
subdir('renderer/vk')
subdir('renderer/null')
subdir('renderer/dx11')

View File

@ -1,877 +0,0 @@
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "gfx/effect.h"
#include "gfx/renderer_api.h"
#include "runtime/buffer_manager.h"
#include "runtime/handles.h"
#include "runtime/mem_arena.h"
#include "common_render_graph.h"
#define MAX_COLOR_ATTACHMENTS_PER_PASS 8
#define MAX_SAMPLED_INPUTS_PER_PASS 8
typedef struct rt_render_target_build_info {
const char *name;
rt_pixel_format format;
unsigned int width;
unsigned int height;
unsigned int samples;
unsigned int layers;
uint32_t first_usage;
uint32_t last_usage;
} rt_render_target_build_info;
typedef struct rt_pass_build_info {
const char *name;
uint32_t flags;
void *userdata;
rt_execute_render_pass_fn *Execute;
rt_rect2 render_area;
float min_depth;
float max_depth;
uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS];
rt_color color_attachment_clear_values[MAX_COLOR_ATTACHMENTS_PER_PASS];
rt_pass_load_mode color_attachment_loads[MAX_COLOR_ATTACHMENTS_PER_PASS];
rt_pass_write_mode color_attachment_writes[MAX_COLOR_ATTACHMENTS_PER_PASS];
uint32_t color_attachment_count;
uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS];
uint32_t sampled_input_count;
uint32_t depth_stencil_attachment;
rt_depth_stencil_value depth_stencil_clear_value;
rt_pass_load_mode depth_stencil_load;
rt_pass_write_mode depth_stencil_write;
uint32_t *dependencies;
uint32_t dependency_count;
} rt_pass_build_info;
typedef struct {
uint32_t signaled_by;
uint32_t waited_on_by;
} rt_sync_point_build_info;
typedef struct rt_render_graph_builder_obj {
rt_arena arena;
rt_render_target_build_info *render_targets;
uint32_t render_target_count;
uint32_t render_target_capacity;
rt_pass_build_info *passes;
uint32_t pass_count;
uint32_t pass_capacity;
rt_physical_render_target_info *phys_render_targets;
uint32_t phys_render_target_count;
rt_sync_point_build_info *sync_points;
uint32_t sync_point_count;
uint32_t backbuffer;
rt_render_graph_builder_platform_callbacks platform_cbs;
} rt_render_graph_builder_obj;
static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
rt_render_graph_builder_obj *obj = _obj;
if (obj->render_target_count == obj->render_target_capacity) {
uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32;
rt_render_target_build_info *tmp =
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_render_target_build_info, new_cap);
if (obj->render_target_capacity)
memcpy(tmp,
obj->render_targets,
sizeof(rt_render_target_build_info) * obj->render_target_capacity);
obj->render_targets = tmp;
obj->render_target_capacity = new_cap;
}
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
strcpy(name, info->name);
obj->render_targets[obj->render_target_count].name = name;
obj->render_targets[obj->render_target_count].format = info->format;
obj->render_targets[obj->render_target_count].width = info->width;
obj->render_targets[obj->render_target_count].height = info->height;
obj->render_targets[obj->render_target_count].samples = info->samples;
obj->render_targets[obj->render_target_count].layers = info->layers;
obj->render_targets[obj->render_target_count].first_usage = 0;
obj->render_targets[obj->render_target_count].last_usage = 0;
++obj->render_target_count;
}
static void SetBackbuffer(void *_obj, const char *rt_name) {
rt_render_graph_builder_obj *obj = _obj;
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
obj->backbuffer = i;
return;
}
}
rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name);
}
static void AddRenderPass(void *_obj, const rt_pass_info *info) {
rt_render_graph_builder_obj *obj = _obj;
if (obj->pass_count == obj->pass_capacity) {
uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32;
rt_pass_build_info *tmp =
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_pass_build_info, new_cap);
if (obj->pass_capacity)
memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity);
obj->passes = tmp;
obj->pass_capacity = new_cap;
}
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
strcpy(name, info->name);
obj->passes[obj->pass_count].name = name;
obj->passes[obj->pass_count].flags = info->flags;
obj->passes[obj->pass_count].color_attachment_count = 0;
obj->passes[obj->pass_count].sampled_input_count = 0;
obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX;
obj->passes[obj->pass_count].dependencies = NULL;
obj->passes[obj->pass_count].dependency_count = 0;
++obj->pass_count;
}
static void AddColorOutput(void *_obj,
const char *pass_name,
const char *rt_name,
rt_pass_load_mode load,
rt_pass_write_mode write,
rt_color clear_color) {
uint32_t rt_index = UINT_MAX;
rt_render_graph_builder_obj *obj = _obj;
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
rt_index = i;
break;
}
}
if (rt_index == UINT_MAX) {
rtLog("ren",
"Tried to add unknown render target %s as color output to %s",
rt_name,
pass_name);
return;
}
for (uint32_t i = 0; i < obj->pass_count; ++i) {
if (strcmp(obj->passes[i].name, pass_name) == 0) {
if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) {
rtLog("ren", "Too many color attachments in pass %s", pass_name);
}
obj->passes[i].color_attachment_clear_values[obj->passes[i].color_attachment_count] =
clear_color;
obj->passes[i].color_attachment_loads[obj->passes[i].color_attachment_count] = load;
obj->passes[i].color_attachment_writes[obj->passes[i].color_attachment_count] = write;
obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index;
return;
}
}
rtLog("ren",
"Tried to add render target %s as color output to unknown render target %s",
rt_name,
pass_name);
}
static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) {
uint32_t rt_index = UINT_MAX;
rt_render_graph_builder_obj *obj = _obj;
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
rt_index = i;
break;
}
}
if (rt_index == UINT_MAX) {
rtLog("ren",
"Tried to add unknown render target %s as color output to %s",
rt_name,
pass_name);
return;
}
for (uint32_t i = 0; i < obj->pass_count; ++i) {
if (strcmp(obj->passes[i].name, pass_name) == 0) {
if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) {
rtLog("ren", "Too many sampled inputs in pass %s", pass_name);
}
obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index;
return;
}
}
rtLog("ren",
"Tried to add render target %s as sampled input to unknown render target %s",
rt_name,
pass_name);
}
static void SetDepthStencilAttachment(void *_obj,
const char *pass_name,
const char *rt_name,
rt_pass_load_mode load,
rt_pass_write_mode write,
rt_depth_stencil_value clear_value) {
uint32_t rt_index = UINT_MAX;
rt_render_graph_builder_obj *obj = _obj;
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
rt_index = i;
break;
}
}
if (rt_index == UINT_MAX) {
rtLog("ren",
"Tried to add unknown render target %s as depth stencil attachment to %s",
rt_name,
pass_name);
return;
}
for (uint32_t i = 0; i < obj->pass_count; ++i) {
if (strcmp(obj->passes[i].name, pass_name) == 0) {
obj->passes[i].depth_stencil_attachment = rt_index;
obj->passes[i].depth_stencil_clear_value = clear_value;
obj->passes[i].depth_stencil_load = load;
obj->passes[i].depth_stencil_write = write;
return;
}
}
rtLog("ren",
"Tried to add render target %s as depth stencil attachment to unknown render target %s",
rt_name,
pass_name);
}
static void SetRenderArea(void *_obj, const char *pass_name, rt_rect2 area, float min_depth, float max_depth) {
rt_render_graph_builder_obj *obj = _obj;
for (uint32_t i = 0; i < obj->pass_count; ++i) {
if (strcmp(obj->passes[i].name, pass_name) == 0) {
obj->passes[i].render_area = area;
obj->passes[i].min_depth = min_depth;
obj->passes[i].max_depth = max_depth;
return;
}
}
rtLog("ren", "Tried to bind unknown render pass %s.", pass_name);
}
static void BindRenderPass(void *_obj,
const char *pass_name,
rt_execute_render_pass_fn *execute_fn,
void *userdata) {
rt_render_graph_builder_obj *obj = _obj;
for (uint32_t i = 0; i < obj->pass_count; ++i) {
if (strcmp(obj->passes[i].name, pass_name) == 0) {
obj->passes[i].Execute = execute_fn;
obj->passes[i].userdata = userdata;
return;
}
}
rtLog("ren", "Tried to bind unknown render pass %s.", pass_name);
}
typedef struct {
uint32_t added;
uint32_t moved;
} rt_find_writers_result;
static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj,
uint32_t rt_index,
uint32_t append_at,
uint32_t *p_passes) {
rt_find_writers_result res = {0, 0};
for (uint32_t i = 0; i < obj->pass_count; ++i) {
bool writes_rt = false;
if (obj->passes[i].depth_stencil_attachment == rt_index) {
writes_rt = true;
} else {
for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) {
if (obj->passes[i].color_attachments[j] == rt_index) {
writes_rt = true;
}
}
}
if (!writes_rt)
continue;
uint32_t lower_index = UINT32_MAX;
for (uint32_t j = 0; j < append_at; ++j) {
if (p_passes[j] == i) {
lower_index = j;
break;
}
}
if (lower_index == UINT32_MAX) {
p_passes[append_at++] = i;
res.added++;
} else {
memmove(&p_passes[lower_index],
&p_passes[lower_index + 1],
(append_at - lower_index - 1) * sizeof(uint32_t));
p_passes[append_at - 1] = i;
res.moved++;
}
}
return res;
}
static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj,
uint32_t search_rt,
uint32_t append_at,
uint32_t *p_order) {
rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order);
uint32_t new_append = append_at + writers.added;
for (uint32_t i = 0; i < writers.moved; ++i) {
uint32_t pass_idx = p_order[append_at - writers.moved + i];
const rt_pass_build_info *pass = &obj->passes[pass_idx];
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
}
}
for (uint32_t i = 0; i < writers.added; ++i) {
uint32_t pass_idx = p_order[append_at + i];
const rt_pass_build_info *pass = &obj->passes[pass_idx];
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
}
}
return new_append;
}
static rt_result
CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) {
uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count);
if (!order)
return RT_OUT_OF_MEMORY;
uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order);
/* Now the pass writing the backbuffer is first, we need to revert the order */
for (uint32_t i = 0; i < count / 2; ++i) {
uint32_t t = order[i];
order[i] = order[count - i - 1];
order[count - i - 1] = t;
}
*p_order = order;
*p_count = count;
return RT_SUCCESS;
}
static uint32_t *
ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) {
/* Our goal is to calculate a schedule that:
* A) Does not break the dependency chain
* B) Has the maximum amount of overlap, i.e. keeps the GPU busy.
* This means that if pass A depends on pass B, we want to have as much passes inbetween as
* possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */
uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count);
if (!schedule)
return NULL;
uint32_t scheduled_count = 0;
while (scheduled_count < pass_count) {
/* The number of passes remaining in naive_order */
uint32_t unscheduled_count = pass_count - scheduled_count;
/* It is always valid to use the front */
uint32_t selected_idx = 0;
uint32_t selected_score = 0;
for (uint32_t i = 0; i < unscheduled_count; ++i) {
/* Check if any dependency is not scheduled yet */
uint32_t pass_idx = naive_order[i];
const rt_pass_build_info *pass = &obj->passes[pass_idx];
uint32_t score = 0;
bool is_valid = true;
if (pass->dependency_count) {
for (uint32_t j = 0; j < unscheduled_count; ++j) {
uint32_t pass2_idx = naive_order[j];
for (uint32_t k = 0; k < pass->dependency_count; ++k) {
if (pass->dependencies[k] == pass2_idx) {
is_valid = false;
break;
}
}
if (!is_valid)
break;
}
if (!is_valid)
continue;
for (uint32_t j = 0; j < pass->dependency_count; ++j) {
for (uint32_t k = 0; k < scheduled_count; ++k) {
if (schedule[k] == pass->dependencies[j]) {
score += scheduled_count - k;
break;
}
}
}
} else {
score = UINT32_MAX;
}
if (score > selected_score) {
selected_score = score;
selected_idx = i;
}
}
schedule[scheduled_count++] = naive_order[selected_idx];
memmove(&naive_order[selected_idx],
&naive_order[selected_idx + 1],
(unscheduled_count - selected_idx - 1) * sizeof(uint32_t));
}
return schedule;
}
static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) {
/* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine
* the two */
for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) {
rt_pass_build_info *pass = &obj->passes[pass_idx];
uint32_t dependency_capacity = pass->sampled_input_count;
if (dependency_capacity) {
pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity);
if (!pass->dependencies)
return RT_OUT_OF_MEMORY;
}
for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) {
uint32_t rt_index = pass->sampled_inputs[input_idx];
for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) {
const rt_pass_build_info *candidate = &obj->passes[candidate_idx];
bool is_dependency = false;
if (candidate->depth_stencil_attachment == rt_index)
is_dependency = true;
for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) {
if (candidate->color_attachments[j] == rt_index)
is_dependency = true;
}
if (!is_dependency)
continue;
if (pass->dependency_count == dependency_capacity) {
/* The dependencies are still on top of the arena, so we can just grow that
* array */
if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity))
return RT_OUT_OF_MEMORY;
dependency_capacity *= 2;
}
pass->dependencies[pass->dependency_count++] = candidate_idx;
}
}
}
return RT_SUCCESS;
}
static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj,
uint32_t pass_count,
const uint32_t *schedule) {
for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) {
rt_render_target_build_info *rt = &obj->render_targets[rt_idx];
rt->first_usage = UINT32_MAX;
rt->last_usage = 0;
for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) {
uint32_t pass_idx = schedule[sched_idx];
const rt_pass_build_info *pass = &obj->passes[pass_idx];
bool usage = pass->depth_stencil_attachment == rt_idx;
if (!usage) {
for (unsigned int i = 0; i < pass->color_attachment_count; ++i) {
if (pass->color_attachments[i] == rt_idx)
usage = true;
}
}
if (!usage) {
for (unsigned int i = 0; i < pass->sampled_input_count; ++i) {
if (pass->sampled_inputs[i] == rt_idx)
usage = true;
}
}
if (usage) {
if (sched_idx < rt->first_usage)
rt->first_usage = sched_idx;
if (sched_idx > rt->last_usage)
rt->last_usage = sched_idx;
}
}
}
}
static rt_result GreedyMergeRenderTargets(rt_render_graph_builder_obj *obj) {
typedef struct {
rt_physical_render_target_info info;
int alive;
int backbuffer;
uint32_t first_usage;
uint32_t last_usage;
} merged_rts;
merged_rts *merged = RT_ARENA_PUSH_ARRAY(&obj->arena, merged_rts, 2 * obj->render_target_count);
if (!merged) {
return RT_OUT_OF_MEMORY;
}
uint32_t candidate_count = obj->render_target_count;
for (uint32_t i = 0; i < candidate_count; ++i) {
merged[i].alive = 1;
merged[i].backbuffer = (i == obj->backbuffer);
merged[i].info.format = obj->render_targets[i].format;
merged[i].info.width = obj->render_targets[i].width;
merged[i].info.height = obj->render_targets[i].height;
merged[i].info.layers = obj->render_targets[i].layers;
merged[i].info.name = obj->render_targets[i].name;
merged[i].info.samples = obj->render_targets[i].samples;
merged[i].first_usage = obj->render_targets[i].first_usage;
merged[i].last_usage = obj->render_targets[i].last_usage;
}
uint32_t *rt_mapping =
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->render_target_count);
if (!rt_mapping)
return RT_OUT_OF_MEMORY;
for (uint32_t i = 0; i < obj->render_target_count; ++i)
rt_mapping[i] = i;
bool did_merge;
do {
did_merge = false;
for (uint32_t first = 0; first < candidate_count - 1; ++first) {
if (!merged[first].alive)
continue;
for (uint32_t second = first + 1; second < candidate_count; ++second) {
if (!merged[second].alive)
continue;
if (!((merged[first].last_usage < merged[second].first_usage) ||
(merged[second].last_usage < merged[first].first_usage)))
continue;
if (!(merged[first].info.width == merged[second].info.width &&
merged[first].info.height == merged[second].info.height &&
merged[first].info.samples == merged[second].info.samples &&
merged[first].info.layers == merged[second].info.layers &&
merged[first].info.format == merged[second].info.format))
continue;
merged[first].alive = 0;
merged[second].alive = 0;
merged_rts combined = {
.alive = 1,
.backbuffer = merged[first].backbuffer || merged[second].backbuffer,
.first_usage = RT_MIN(merged[first].first_usage, merged[second].first_usage),
.last_usage = RT_MAX(merged[first].last_usage, merged[second].last_usage),
.info = merged[first].info,
};
char *combined_name = rtArenaPush(&obj->arena,
strlen(merged[first].info.name) +
strlen(merged[second].info.name) + 2);
if (!combined_name)
return RT_OUT_OF_MEMORY;
strcpy(combined_name, merged[first].info.name);
strcat(combined_name, "+");
strcat(combined_name, merged[second].info.name);
combined.info.name = combined_name;
/* Update mappings. If indes < render_target_count, than it refers to a
* logical render target. If not, it refers to a merged render target */
if (first < obj->render_target_count) {
rt_mapping[first] = candidate_count;
} else {
// Find mappings that refer to this index and update them
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
if (rt_mapping[i] == first)
rt_mapping[i] = candidate_count;
}
}
if (second < obj->render_target_count) {
rt_mapping[second] = candidate_count;
} else {
// Find mappings that refer to this index and update them
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
if (rt_mapping[i] == second)
rt_mapping[i] = candidate_count;
}
}
RT_ASSERT(candidate_count < 2 * obj->render_target_count, "");
merged[candidate_count++] = combined;
did_merge = true;
break;
}
if (did_merge)
break;
}
} while (did_merge);
uint32_t phys_count = 0;
for (uint32_t i = 0; i < candidate_count; ++i) {
if (merged[i].alive)
++phys_count;
}
obj->phys_render_targets =
RT_ARENA_PUSH_ARRAY(&obj->arena, rt_physical_render_target_info, phys_count);
if (!obj->phys_render_targets)
return RT_OUT_OF_MEMORY;
obj->phys_render_target_count = 0;
for (uint32_t i = 0; i < candidate_count; ++i) {
if (merged[i].alive) {
uint32_t index = obj->phys_render_target_count;
if (merged[i].backbuffer)
obj->backbuffer = obj->phys_render_target_count;
obj->phys_render_targets[obj->phys_render_target_count++] = merged[i].info;
/* Update the mapping table */
for (uint32_t j = 0; j < obj->render_target_count; ++j) {
if (rt_mapping[j] == i)
rt_mapping[j] = index;
}
}
}
/* Update pass render target references */
for (uint32_t i = 0; i < obj->pass_count; ++i) {
if (obj->passes[i].depth_stencil_attachment < UINT_MAX)
obj->passes[i].depth_stencil_attachment =
rt_mapping[obj->passes[i].depth_stencil_attachment];
for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j)
obj->passes[i].color_attachments[j] = rt_mapping[obj->passes[i].color_attachments[j]];
for (uint32_t j = 0; j < obj->passes[i].sampled_input_count; ++j)
obj->passes[i].sampled_inputs[j] = rt_mapping[obj->passes[i].sampled_inputs[j]];
}
obj->backbuffer = rt_mapping[obj->backbuffer];
return RT_SUCCESS;
}
static rt_result CreateSynchronizationPoints(rt_render_graph_builder_obj *obj) {
RT_ASSERT(false, "Not implemented yet");
return RT_UNKNOWN_ERROR;
}
static uint32_t *DeterminePassExecutionLevels(rt_render_graph_builder_obj *obj,
const uint32_t *schedule) {
uint32_t *execution_levels = RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->pass_count);
if (!execution_levels)
return NULL;
const rt_pass_build_info *passes = obj->passes;
uint32_t pass_count = obj->pass_count;
for (uint32_t i = 0; i < pass_count; ++i) {
uint32_t level = 0;
uint32_t pass_idx = schedule[i];
for (uint32_t j = 0; j < passes[pass_idx].dependency_count; ++j) {
uint32_t dep_idx = passes[pass_idx].dependencies[j];
level = RT_MAX(execution_levels[dep_idx] + 1, level);
}
execution_levels[pass_idx] = level;
}
return execution_levels;
}
static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj,
const uint32_t *order,
const uint32_t *execution_levels) {
size_t runtime_data_size = obj->platform_cbs.GetRuntimeDataSize();
size_t required_size = sizeof(rt_render_graph);
required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
required_size += obj->sync_point_count * sizeof(rt_gpu_semaphore_handle);
required_size += obj->pass_count * sizeof(rt_render_pass);
required_size += obj->pass_count * runtime_data_size;
size_t pass_attachment_size = 0;
for (uint32_t i = 0; i < obj->pass_count; ++i) {
required_size += strlen(obj->passes[i].name) + 1;
pass_attachment_size += obj->passes[i].color_attachment_count *
(sizeof(rt_render_target_handle) + sizeof(rt_color) +
sizeof(rt_pass_load_mode) + sizeof(rt_pass_write_mode));
pass_attachment_size +=
obj->passes[i].sampled_input_count * sizeof(rt_render_target_handle);
}
required_size += pass_attachment_size;
rt_render_graph *graph = rtAllocBuffer(required_size);
if (!graph)
return NULL;
memset(graph, 0, required_size);
graph->render_targets = (rt_render_target_handle *)(graph + 1);
graph->semaphores =
(rt_gpu_semaphore_handle *)(graph->render_targets + obj->phys_render_target_count);
graph->passes = (rt_render_pass *)(graph->semaphores + obj->sync_point_count);
char *attachment_storage = (char *)(graph->passes + obj->pass_count);
char *runtime_data = attachment_storage + pass_attachment_size;
char *names = runtime_data + runtime_data_size * obj->pass_count;
char *next_name = names;
graph->render_target_count = obj->phys_render_target_count;
graph->semaphore_count = obj->sync_point_count;
graph->pass_count = obj->pass_count;
for (uint32_t i = 0; i < obj->phys_render_target_count; ++i) {
graph->render_targets[i] =
obj->platform_cbs.CreateRenderTarget(&obj->phys_render_targets[i]);
}
for (uint32_t i = 0; i < obj->sync_point_count; ++i) {
// TODO
RT_NOT_IMPLEMENTED;
}
for (uint32_t i = 0; i < obj->pass_count; ++i) {
uint32_t passidx = order[i];
size_t namelen = strlen(obj->passes[passidx].name);
graph->passes[i].Execute = RT_VERIFY(obj->passes[passidx].Execute);
graph->passes[i].user_data = obj->passes[passidx].userdata;
graph->passes[i].flags = obj->passes[passidx].flags;
graph->passes[i].id = rtCalculateRenderPassID(obj->passes[passidx].name, namelen);
graph->passes[i].first_signal = 0;
graph->passes[i].signal_count = 0;
graph->passes[i].first_wait = 0;
graph->passes[i].wait_count = 0;
graph->passes[i].execution_level = execution_levels[passidx];
graph->passes[i].render_area = obj->passes[passidx].render_area;
graph->passes[i].min_depth = obj->passes[passidx].min_depth;
graph->passes[i].max_depth = obj->passes[passidx].max_depth;
graph->passes[i].depth_stencil =
(obj->passes[i].depth_stencil_attachment != UINT_MAX)
? graph->render_targets[obj->passes[i].depth_stencil_attachment]
: (rt_render_target_handle)RT_INVALID_HANDLE;
graph->passes[i].depth_stencil_clear_value = obj->passes[i].depth_stencil_clear_value;
graph->passes[i].depth_stencil_load = obj->passes[i].depth_stencil_load;
graph->passes[i].depth_stencil_write = obj->passes[i].depth_stencil_write;
graph->passes[i].color_output_count = obj->passes[i].color_attachment_count;
if (graph->passes[i].color_output_count) {
graph->passes[i].color_outputs = (rt_render_target_handle *)attachment_storage;
attachment_storage +=
sizeof(rt_render_target_handle) * graph->passes[i].color_output_count;
graph->passes[i].color_clear_values = (rt_color *)attachment_storage;
attachment_storage += sizeof(rt_color) * graph->passes[i].color_output_count;
graph->passes[i].color_loads = (rt_pass_load_mode *)attachment_storage;
attachment_storage += sizeof(rt_pass_load_mode) * graph->passes[i].color_output_count;
graph->passes[i].color_writes = (rt_pass_write_mode *)attachment_storage;
attachment_storage += sizeof(rt_pass_write_mode) * graph->passes[i].color_output_count;
for (uint32_t j = 0; j < graph->passes[i].color_output_count; ++j) {
graph->passes[i].color_outputs[j] =
graph->render_targets[obj->passes[i].color_attachments[j]];
graph->passes[i].color_clear_values[j] =
obj->passes[i].color_attachment_clear_values[j];
graph->passes[i].color_loads[j] = obj->passes[i].color_attachment_loads[j];
graph->passes[i].color_writes[j] = obj->passes[i].color_attachment_writes[j];
}
}
graph->passes[i].sampled_input_count = obj->passes[i].sampled_input_count;
if (graph->passes[i].sampled_input_count) {
graph->passes[i].sampled_inputs = (rt_render_target_handle *)attachment_storage;
attachment_storage +=
sizeof(rt_render_target_handle) * graph->passes[i].sampled_input_count;
for (uint32_t j = 0; j < graph->passes[i].sampled_input_count; ++j) {
graph->passes[i].sampled_inputs[j] =
graph->render_targets[obj->passes[i].sampled_inputs[j]];
}
}
graph->passes[i].runtime_data = (void *)(runtime_data + i * runtime_data_size);
graph->passes[i].name = next_name;
next_name += namelen + 1;
memcpy((char *)graph->passes[i].name, obj->passes[passidx].name, namelen + 1);
}
graph->backbuffer_index = obj->backbuffer;
return graph;
}
static rt_result Build(void *_obj, rt_render_graph **p_graph) {
rt_render_graph_builder_obj *obj = _obj;
uint32_t *naive_order;
uint32_t pass_count;
rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count);
if (res != RT_SUCCESS)
return res;
res = DeterminePassDependencies(obj);
if (res != RT_SUCCESS)
return res;
uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order);
if (!optimized_order)
return RT_OUT_OF_MEMORY;
DetermineRenderTargetUsage(obj, pass_count, optimized_order);
res = GreedyMergeRenderTargets(obj);
if (res != RT_SUCCESS)
return res;
uint32_t *execution_levels = DeterminePassExecutionLevels(obj, optimized_order);
if (!execution_levels)
return RT_OUT_OF_MEMORY;
if (obj->platform_cbs.RequireExplicitSynchronization()) {
res = CreateSynchronizationPoints(obj);
if (res != RT_SUCCESS)
return res;
} else {
obj->sync_point_count = 0;
}
*p_graph = CreateRenderGraph(obj, optimized_order, execution_levels);
return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR;
}
rt_render_graph_builder
rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs) {
// TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions
rt_render_graph_builder_obj *obj = malloc(sizeof(*obj));
RT_ASSERT(obj, "Failed to allocate the builder object.");
memset(obj, 0, sizeof(*obj));
rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16));
RT_ASSERT(arena_res.ok, "");
obj->arena = arena_res.arena;
obj->platform_cbs = *platform_cbs;
return (rt_render_graph_builder){
.obj = obj,
.AddRenderTarget = AddRenderTarget,
.SetBackbuffer = SetBackbuffer,
.AddRenderPass = AddRenderPass,
.AddColorOutput = AddColorOutput,
.AddSampledInput = AddSampledInput,
.SetDepthStencilAttachment = SetDepthStencilAttachment,
.SetRenderArea = SetRenderArea,
.BindRenderPass = BindRenderPass,
.Build = Build,
};
}
void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder) {
rt_render_graph_builder_obj *obj = builder->obj;
rtReleaseArena(&obj->arena);
free(obj);
memset(builder, 0, sizeof(*builder));
}

View File

@ -1,99 +0,0 @@
#ifndef RT_RENDERER_COMMON_RENDER_GRAPH_H
#define RT_RENDERER_COMMON_RENDER_GRAPH_H
#include "gfx/renderer_api.h"
#include "runtime/mem_arena.h"
typedef struct {
const char *name;
rt_pixel_format format;
unsigned int width;
unsigned int height;
unsigned int samples;
unsigned int layers;
} rt_physical_render_target_info;
typedef rt_render_target_handle
rt_rgb_create_render_target_fn(const rt_physical_render_target_info *rt_info);
typedef int rt_rgb_require_explicit_synchronization_fn(void);
typedef size_t rt_rgb_get_runtime_data_size_fn(void);
typedef struct {
rt_rgb_create_render_target_fn *CreateRenderTarget;
rt_rgb_require_explicit_synchronization_fn *RequireExplicitSynchronization;
rt_rgb_get_runtime_data_size_fn *GetRuntimeDataSize;
} rt_render_graph_builder_platform_callbacks;
typedef struct {
uint32_t flags;
/* Used for cheap referencing */
uint32_t id;
/* Used for debug output */
const char *name;
/* Viewport info */
rt_rect2 render_area;
float min_depth;
float max_depth;
/* Render targets */
rt_render_target_handle *color_outputs;
rt_color *color_clear_values;
rt_pass_load_mode *color_loads;
rt_pass_write_mode *color_writes;
uint32_t color_output_count;
rt_render_target_handle depth_stencil;
rt_depth_stencil_value depth_stencil_clear_value;
rt_pass_load_mode depth_stencil_load;
rt_pass_write_mode depth_stencil_write;
rt_render_target_handle *sampled_inputs;
uint32_t sampled_input_count;
/* Used for parallelisation on the CPU-side. All passes with execution level N can
* be recorded in parallel, after passes with level N-1 have finished. */
uint32_t execution_level;
/* GFX layer function for executing the pass */
rt_execute_render_pass_fn *Execute;
void *user_data;
/* Allocated by the backend, used during runtime */
void *runtime_data;
/* These refer to the semaphores array */
uint32_t first_wait;
uint32_t wait_count;
uint32_t first_signal;
uint32_t signal_count;
} rt_render_pass;
struct rt_render_graph_s {
rt_render_target_handle *render_targets;
uint32_t render_target_count;
rt_gpu_semaphore_handle *semaphores;
uint32_t semaphore_count;
rt_render_pass *passes;
uint32_t pass_count;
uint32_t backbuffer_index;
};
#ifdef __cplusplus
extern "C" {
#endif
rt_render_graph_builder
rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs);
void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,146 +0,0 @@
#include <d3d11.h>
#include <d3d11_1.h>
#include "gfx/renderer_api.h"
#include "runtime/config.h"
#include "runtime/handles.h"
#include "runtime/threading_helpers.hpp"
#include "device_objects.hpp"
#include "gpu.hpp"
RT_CVAR_I(rt_Dx11MaxBuffers,
"Maximum number of simultaneously existing buffers. Default: 4096",
4096);
static rt_buffer *_buffers;
static rt_buffer *_first_free;
static rt_mutex *_lock;
rt_result InitBufferManagement() {
_buffers =
reinterpret_cast<rt_buffer *>(calloc((size_t)rt_Dx11MaxBuffers.i, sizeof(rt_buffer)));
if (!_buffers) {
return RT_OUT_OF_MEMORY;
}
_lock = rtCreateMutex();
if (!_lock) {
free(_buffers);
return RT_UNKNOWN_ERROR;
}
_first_free = _buffers + 2;
for (int i = 0; i < rt_Dx11MaxBuffers.i; ++i) {
_buffers[i].next_free = &_buffers[i + 1];
}
return RT_SUCCESS;
}
void ShutdownBufferManagement() {
for (int i = 0; i < rt_Dx11MaxBuffers.i; ++i) {
if (_buffers[i].buffer)
_buffers[i].buffer->Release();
}
free(_buffers);
rtDestroyMutex(_lock);
}
rt_buffer *rtGetBuffer(rt_buffer_handle handle) {
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxBuffers.i)
return nullptr;
auto lg = rtAutoLock(_lock);
if (handle.version != _buffers[handle.index].version)
return nullptr;
return &_buffers[handle.index];
}
extern "C" rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
const rt_buffer_info *info,
rt_buffer_handle *p_buffers) {
for (uint32_t i = 0; i < count; ++i) {
rtLockMutex(_lock);
rt_buffer *slot = _first_free;
if (slot)
_first_free = slot->next_free;
rtUnlockMutex(_lock);
if (!slot) {
rtLog("dx11", "Failed to allocate a command buffer slot.");
rtLockMutex(_lock);
for (uint32_t j = 0; j < i; ++j) {
rt_buffer *s = &_buffers[p_buffers[j].index];
s->next_free = _first_free;
_first_free = s;
_first_free = s;
}
rtUnlockMutex(_lock);
return RT_OUT_OF_MEMORY;
}
D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
if (info[i].usage == RT_BUFFER_USAGE_STATIC) {
usage = D3D11_USAGE_IMMUTABLE;
} else if (info[i].usage == RT_BUFFER_USAGE_DYNAMIC) {
usage = D3D11_USAGE_DEFAULT;
} else if (info[i].usage == RT_BUFFER_USAGE_TRANSIENT) {
usage = D3D11_USAGE_DYNAMIC;
}
UINT bind_flags = D3D11_BIND_UNORDERED_ACCESS;
if (info[i].type == RT_BUFFER_TYPE_VERTEX)
bind_flags = D3D11_BIND_VERTEX_BUFFER;
else if (info[i].type == RT_BUFFER_TYPE_INDEX)
bind_flags = D3D11_BIND_INDEX_BUFFER;
else if (info[i].type == RT_BUFFER_TYPE_UNIFORM)
bind_flags = D3D11_BIND_CONSTANT_BUFFER;
else if (info[i].type == RT_BUFFER_TYPE_STORAGE)
bind_flags = D3D11_BIND_UNORDERED_ACCESS;
D3D11_BUFFER_DESC desc = {};
desc.ByteWidth = static_cast<UINT>(((info[i].size + 15) / 16) * 16);
desc.Usage = usage;
desc.BindFlags = bind_flags;
desc.CPUAccessFlags = 0;
desc.MiscFlags = 0;
desc.StructureByteStride = 1;
D3D11_SUBRESOURCE_DATA data;
data.pSysMem = info->data;
data.SysMemPitch = 0;
data.SysMemSlicePitch = 0;
if (FAILED(
g_gpu.device->CreateBuffer(&desc, info[i].data ? &data : nullptr, &slot->buffer))) {
rtLog("dx11", "Failed to create a deferred context.");
auto lock_guard = rtAutoLock(_lock);
for (uint32_t j = 0; j < i; ++j) {
rt_buffer *s = &_buffers[p_buffers[j].index];
s->next_free = _first_free;
_first_free = s;
}
return RT_UNKNOWN_ERROR;
}
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
const uint32_t index = (uint32_t)(slot - _buffers);
p_buffers[i].version = slot->version;
p_buffers[i].index = index;
}
return RT_SUCCESS;
}
extern "C" void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
for (uint32_t i = 0; i < count; ++i) {
if (!RT_IS_HANDLE_VALID(buffers[i]) || (int)buffers[i].index >= rt_Dx11MaxBuffers.i)
continue;
auto lg = rtAutoLock(_lock);
if (buffers[i].version != _buffers[buffers[i].index].version)
continue;
_buffers[buffers[i].index].buffer->Release();
_buffers[buffers[i].index].next_free = _first_free;
_first_free = &_buffers[buffers[i].index];
}
}

View File

@ -1,148 +0,0 @@
#include <d3d11.h>
#include <d3d11_1.h>
#include "gfx/renderer_api.h"
#include "runtime/config.h"
#include "runtime/handles.h"
#include "runtime/threading.h"
#include "runtime/threading_helpers.hpp"
#include "device_objects.hpp"
#include "gpu.hpp"
RT_CVAR_I(rt_Dx11MaxCommandBuffers,
"Maximum number of simultaneously created command buffers. Default: 1024",
1024);
static rt_command_buffer *_buffers;
static rt_command_buffer *_first_free;
static rt_mutex *_lock;
rt_result InitCommandBufferManagement() {
_buffers = reinterpret_cast<rt_command_buffer *>(
calloc((size_t)rt_Dx11MaxCommandBuffers.i, sizeof(rt_command_buffer)));
if (!_buffers)
return RT_OUT_OF_MEMORY;
_first_free = &_buffers[1];
_lock = rtCreateMutex();
if (!_lock) {
free(_buffers);
return RT_UNKNOWN_ERROR;
}
for (int i = 0; i < rt_Dx11MaxCommandBuffers.i - 1; ++i) {
_buffers[i].next_free = &_buffers[i + 1];
}
return RT_SUCCESS;
}
void ShutdownCommandBufferManagement() {
for (int i = 0; i < rt_Dx11MaxCommandBuffers.i; ++i) {
if (_buffers[i].context)
_buffers[i].context->Release();
}
free(_buffers);
_buffers = nullptr;
}
rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles) {
for (uint32_t i = 0; i < count; ++i) {
rtLockMutex(_lock);
rt_command_buffer *slot = _first_free;
if (slot)
_first_free = slot->next_free;
rtUnlockMutex(_lock);
if (!slot) {
rtLog("dx11", "Failed to allocate a command buffer slot.");
rtLockMutex(_lock);
for (uint32_t j = 0; j < i; ++j) {
rt_command_buffer *s = &_buffers[p_handles[j].index];
s->next_free = _first_free;
_first_free = s;
}
rtUnlockMutex(_lock);
return RT_OUT_OF_MEMORY;
}
if (!slot->context) {
if (FAILED(g_gpu.device->CreateDeferredContext1(0, &slot->context))) {
rtLog("dx11", "Failed to create a deferred context.");
auto lock_guard = rtAutoLock(_lock);
for (uint32_t j = 0; j < i; ++j) {
rt_command_buffer *s = &_buffers[p_handles[j].index];
s->next_free = _first_free;
_first_free = s;
}
return RT_UNKNOWN_ERROR;
}
#ifdef RT_DEBUG
if (FAILED(slot->context->QueryInterface(IID_PPV_ARGS(&slot->annotation)))) {
rtLog("dx11", "Failed to retrieve the annotation interface.");
slot->annotation = nullptr;
}
#endif
} else {
slot->context->ClearState();
}
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
const uint32_t index = (uint32_t)(slot - _buffers);
p_handles[i].version = slot->version;
p_handles[i].index = index;
}
return RT_SUCCESS;
}
rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles) {
// TODO: Handle semaphores
// Submit the command lists to the gpu
for (uint32_t i = 0; i < count; ++i) {
rt_command_buffer *cmdbuf = &_buffers[handles[i].index];
if (cmdbuf->version != handles[i].version) {
rtLog("dx11", "Tried to submit an invalid command buffer (version mismatch)");
return RT_INVALID_VALUE;
}
ID3D11CommandList *cmdlist;
if (FAILED(cmdbuf->context->FinishCommandList(FALSE, &cmdlist))) {
rtLog("dx11", "FinishCommandList failed");
return RT_UNKNOWN_ERROR;
}
rtLockMutex(g_gpu.context_lock);
g_gpu.device_context->ExecuteCommandList(cmdlist, FALSE);
rtUnlockMutex(g_gpu.context_lock);
rtLockMutex(_lock);
cmdbuf->next_free = _first_free;
_first_free = cmdbuf;
rtUnlockMutex(_lock);
}
return RT_SUCCESS;
}
rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle) {
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxCommandBuffers.i)
return nullptr;
auto lg = rtAutoLock(_lock);
if (handle.version != _buffers[handle.index].version)
return nullptr;
return &_buffers[handle.index];
}
extern "C" rt_result
RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
const rt_alloc_command_buffer_info *,
rt_command_buffer_handle *p_command_buffers) {
return rtAllocCommandBuffers(count, p_command_buffers);
}
extern "C" rt_result
RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue, const rt_submit_command_buffers_info *info) {
return rtSubmitCommandBuffers(info->command_buffer_count, info->command_buffers);
}

View File

@ -1,167 +0,0 @@
#include <d3d11.h>
#include <d3d11_1.h>
#include "gfx/renderer_api.h"
#include "runtime/mem_arena.h"
#include "device_objects.hpp"
#include "gpu.hpp"
extern "C" void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmdhandle,
const rt_cmd_begin_pass_info *info) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
if (!RT_VERIFY(cmd))
return;
if (cmd->annotation) {
WCHAR wname[128];
if (rtUTF8ToWStr(info->name, wname, sizeof(wname)) == RT_SUCCESS)
cmd->annotation->BeginEvent(wname);
}
// Setup rtvs
ID3D11RenderTargetView *rtvs[4];
ID3D11DepthStencilView *dsv = nullptr;
for (uint32_t i = 0; i < info->color_buffer_count; ++i) {
rt_render_target *rt = rtGetRenderTarget(info->color_buffers[i]);
if (!RT_VERIFY(rt))
return;
RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target");
rtvs[i] = rt->rtv;
if (info->color_buffer_loads[i] == RT_PASS_LOAD_MODE_CLEAR) {
FLOAT color[4] = {
info->color_buffer_clear_values[i].color.r,
info->color_buffer_clear_values[i].color.g,
info->color_buffer_clear_values[i].color.b,
info->color_buffer_clear_values[i].color.a,
};
cmd->context->ClearRenderTargetView(rt->rtv, color);
}
}
rt_render_target *dsvrt = rtGetRenderTarget(info->depth_stencil_buffer);
if (dsvrt) {
RT_ASSERT(dsvrt->IsDepthStencilTarget(),
"Need to provide a valid depth stencil render target");
dsv = dsvrt->dsv;
if (info->depth_stencil_buffer_load == RT_PASS_LOAD_MODE_CLEAR)
cmd->context->ClearDepthStencilView(
dsv,
(dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL
: D3D11_CLEAR_DEPTH,
info->depth_stencil_buffer_clear_value.depth_stencil.depth,
static_cast<UINT8>(info->depth_stencil_buffer_clear_value.depth_stencil.stencil));
}
cmd->context->OMSetRenderTargets(static_cast<UINT>(info->color_buffer_count), rtvs, dsv);
D3D11_VIEWPORT viewport;
viewport.TopLeftX = static_cast<float>(info->render_area.offset.x);
viewport.TopLeftY = static_cast<float>(info->render_area.offset.y);
viewport.Width = static_cast<float>(info->render_area.size.x);
viewport.Height = static_cast<float>(info->render_area.size.y);
viewport.MinDepth = 0.f;
viewport.MaxDepth = 1.f;
cmd->context->RSSetViewports(1, &viewport);
// We currently only support triangles, so here is a good place to set this
cmd->context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
}
extern "C" void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmdhandle) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
if (!RT_VERIFY(cmd))
return;
if (cmd->annotation) {
cmd->annotation->EndEvent();
}
}
extern "C" void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdhandle,
rt_render_target_handle target,
rt_render_target_state state) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
if (!RT_VERIFY(cmd))
return;
RT_UNUSED(target);
RT_UNUSED(state);
}
extern "C" void
RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdhandle,
rt_render_target_handle render_target) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
if (!RT_VERIFY(cmd))
return;
RT_UNUSED(render_target);
}
extern "C" void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle,
rt_pipeline_handle pipeline_handle) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
if (!RT_VERIFY(cmd))
return;
rt_pipeline *pipeline = rtGetPipeline(pipeline_handle);
if (pipeline->IsComputePipeline()) {
rtReportError("dx11",
"Attempted to bind a compute pipeline via CmdBindPipeline. Use "
"CmdBindComputePipeline instead.");
return;
}
auto context = cmd->context;
context->IASetInputLayout(pipeline->input_layout);
context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
context->VSSetShader(pipeline->vertex_shader, nullptr, 0);
context->PSSetShader(pipeline->pixel_shader, nullptr, 0);
context->RSSetState(pipeline->rasterizer_state);
}
extern "C" void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle,
uint32_t first_binding,
uint32_t count,
const rt_buffer_handle *buffers,
const uint32_t *_strides,
const uint32_t *_offsets) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
if (!RT_VERIFY(cmd))
return;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return;
ID3D11Buffer **vbos = RT_ARENA_PUSH_ARRAY(temp.arena, ID3D11Buffer *, count);
static_assert(sizeof(UINT) == sizeof(uint32_t));
const UINT *offsets = _offsets;
const UINT *strides = _strides;
if (!vbos || !strides)
goto out;
if (!offsets) {
offsets = RT_ARENA_PUSH_ARRAY_ZERO(temp.arena, UINT, count);
}
for (uint32_t i = 0; i < count; ++i) {
rt_buffer *buffer = rtGetBuffer(buffers[i]);
RT_ASSERT(buffer->type == RT_BUFFER_TYPE_VERTEX, "Buffer must be a vertex buffer");
vbos[i] = buffer->buffer;
}
cmd->context->IASetVertexBuffers(first_binding, count, vbos, strides, offsets);
out:
rtReturnTemporaryArena(temp);
}
extern "C" void
RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle, uint32_t first, uint32_t count) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
if (!RT_VERIFY(cmd))
return;
cmd->context->Draw(count, first);
}

View File

@ -1,95 +0,0 @@
#ifndef RT_DX11_DEVICE_OBJECTS_HPP
#define RT_DX11_DEVICE_OBJECTS_HPP
// Types containing various api objects
#include <stdint.h>
#include <d3d11.h>
#include <d3d11_1.h>
#include "runtime/runtime.h"
struct rt_render_target {
// Only one of these should be valid
ID3D11RenderTargetView *rtv;
ID3D11DepthStencilView *dsv;
ID3D11Texture2D *texture;
rt_pixel_format format;
uint32_t version;
rt_render_target *next_free;
RT_INLINE bool HasStencilComponent() const {
return format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8;
}
RT_INLINE bool IsColorRenderTarget() const {
RT_ASSERT(!(rtv != nullptr && dsv != nullptr),
"A render target should not contain a render target and a depth stencil view");
return rtv != nullptr;
}
RT_INLINE bool IsDepthStencilTarget() const {
RT_ASSERT(!(rtv != nullptr && dsv != nullptr),
"A render target should not contain a render target and a depth stencil view");
return dsv != nullptr;
}
};
struct rt_command_buffer {
// Only created once and then re-used.
ID3D11DeviceContext1 *context;
ID3DUserDefinedAnnotation *annotation;
uint32_t version;
rt_command_buffer *next_free;
};
struct rt_buffer {
ID3D11Buffer *buffer;
rt_buffer_type type;
rt_buffer_usage usage;
uint32_t version;
rt_buffer *next_free;
};
struct rt_pipeline {
ID3D11InputLayout *input_layout;
ID3D11VertexShader *vertex_shader;
ID3D11PixelShader *pixel_shader;
ID3D11ComputeShader *compute_shader;
ID3D11RasterizerState *rasterizer_state;
rt_pipeline *next_free;
uint32_t version;
RT_INLINE bool IsComputePipeline() const {
RT_ASSERT(!(compute_shader && (vertex_shader || pixel_shader)),
"A pipeline should contain either a compute shader or graphics shaders.");
return compute_shader != nullptr;
}
};
struct rt_render_target_create_info {
rt_pixel_format format;
uint32_t width;
uint32_t height;
const char *name;
};
rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info);
void rtDestroyRenderTarget(rt_render_target_handle handle);
rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles);
rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles);
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle);
rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle);
rt_buffer *rtGetBuffer(rt_buffer_handle handle);
rt_pipeline *rtGetPipeline(rt_pipeline_handle handle);
#endif

View File

@ -1,43 +0,0 @@
#ifndef RT_DX11_GPU_HPP
#define RT_DX11_GPU_HPP
#include <wrl.h>
#include <d3d11.h>
#include <d3d11_1.h>
#include <dxgi1_3.h>
#include "runtime/threading.h"
#include "gfx/renderer_api.h"
#define RT_DX11_MAX_FRAMES_IN_FLIGHT 2
// Smart pointer for COM-Objects
template<typename T>
using ComPtr = Microsoft::WRL::ComPtr<T>;
struct rt_swap_chain {
ComPtr<IDXGISwapChain1> swap_chain;
ComPtr<ID3D11RenderTargetView> rtv;
};
struct rt_gpu {
ComPtr<ID3D11Device1> device;
ComPtr<ID3D11DeviceContext1> device_context;
ComPtr<IDXGIFactory2> dxgi_factory;
rt_swap_chain swap_chain;
rt_mutex *context_lock;
D3D_FEATURE_LEVEL feature_level;
D3D11_FEATURE_DATA_THREADING threading_support;
};
#ifndef DONT_DEFINE_GPU_GLOBAL
extern rt_gpu g_gpu;
#endif
DXGI_FORMAT rtConvertPixelFormat(rt_pixel_format format);
#endif

View File

@ -1,35 +0,0 @@
#include "gpu.hpp"
DXGI_FORMAT rtConvertPixelFormat(rt_pixel_format format) {
switch (format) {
case RT_PIXEL_FORMAT_INVALID:
return DXGI_FORMAT_UNKNOWN;
case RT_PIXEL_FORMAT_R8G8B8A8_UNORM:
return DXGI_FORMAT_R8G8B8A8_UNORM;
case RT_PIXEL_FORMAT_B8G8R8A8_UNORM:
return DXGI_FORMAT_B8G8R8A8_UNORM;
case RT_PIXEL_FORMAT_R8G8B8A8_SRGB:
return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
case RT_PIXEL_FORMAT_B8G8R8A8_SRGB:
return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
case RT_PIXEL_FORMAT_R8G8B8_UNORM:
return DXGI_FORMAT_R8G8B8A8_UNORM;
case RT_PIXEL_FORMAT_B8G8R8_UNORM:
return DXGI_FORMAT_B8G8R8X8_UNORM;
case RT_PIXEL_FORMAT_R8G8B8_SRGB:
return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
case RT_PIXEL_FORMAT_B8G8R8_SRGB:
return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB;
case RT_PIXEL_FORMAT_DEPTH24_STENCIL8:
return DXGI_FORMAT_D24_UNORM_S8_UINT;
case RT_PIXEL_FORMAT_DEPTH32:
return DXGI_FORMAT_D32_FLOAT;
case RT_PIXEL_FORMAT_SWAPCHAIN:
return DXGI_FORMAT_B8G8R8A8_UNORM;
default:
return DXGI_FORMAT_UNKNOWN;
}
}

View File

@ -1,288 +0,0 @@
#ifndef _WIN32
#pragma warning Building DX11 on non - windows is probably a mistake
#endif
#include <d3d11.h>
#include <dxgi1_3.h>
#include <wrl.h>
#include "gfx/renderer_api.h"
#include "runtime/config.h"
#define DONT_DEFINE_RENDERER_GLOBAL
#include "gpu.hpp"
RT_CVAR_S(
rt_Dx11AdapterName,
"Name of the adapter that should be used for device creation. Default: \"\" (Use default)",
"");
RT_CVAR_I(rt_Dx11VSync, "Enable vsync. Default: 1", 1);
RT_CVAR_I(rt_Dx11MaxSubmittedCommandBuffers,
"Maximum number of submitted command buffers per frame. Default: 1024",
1024);
extern rt_cvar rt_Dx11MaxCommandBuffers;
rt_gpu g_gpu;
extern "C" void RT_RENDERER_API_FN(RegisterCVars)(void) {
rtRegisterCVAR(&rt_Dx11AdapterName);
rtRegisterCVAR(&rt_Dx11VSync);
rtRegisterCVAR(&rt_Dx11MaxCommandBuffers);
}
static rt_swap_chain CreateSwapChain(HWND hwnd) {
rt_swap_chain swc;
DXGI_SWAP_CHAIN_DESC1 desc;
desc.Width = 0; // use window width
desc.Height = 0; // use window height
desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; // can't specify _SRGB here when using
// DXGI_SWAP_EFFECT_FLIP_* ...;
desc.Stereo = FALSE;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.BufferCount = 2;
desc.Scaling = DXGI_SCALING_STRETCH;
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
desc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
desc.Flags = 0;
if (FAILED(g_gpu.dxgi_factory->CreateSwapChainForHwnd(g_gpu.device.Get(),
hwnd,
&desc,
nullptr,
nullptr,
&swc.swap_chain))) {
rtReportError("dx11", "Failed to create the swap chain.");
return swc;
}
ID3D11Texture2D *frame_buffer;
if (FAILED(swc.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) {
rtReportError("dx11", "Failed to retrieve the backbuffer.");
swc.swap_chain.Reset();
return swc;
}
D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
rtv_desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
if (FAILED(g_gpu.device->CreateRenderTargetView(frame_buffer, &rtv_desc, &swc.rtv))) {
rtReportError("dx11", "Failed to create the render target view for the backbuffer.");
swc.swap_chain.Reset();
return swc;
}
return swc;
}
static IDXGIAdapter *RetrieveSelectedAdapter(void) {
ComPtr<IDXGIFactory2> factory;
if (FAILED(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory)))) {
return NULL;
}
UINT i = 0;
IDXGIAdapter *adapter;
while (factory->EnumAdapters(i, &adapter) == S_OK) {
++i;
DXGI_ADAPTER_DESC desc;
adapter->GetDesc(&desc);
char utf8_desc[256];
rtWStrToUTF8(desc.Description, utf8_desc, 256);
if (strncmp(utf8_desc, rt_Dx11AdapterName.s, 256) == 0)
return adapter;
}
return NULL;
}
extern rt_result InitCommandBufferManagement();
extern void ShutdownCommandBufferManagement();
extern rt_result InitRenderTargetManagement();
extern void ShutdownRenderTargetManagement();
extern rt_result InitBufferManagement();
extern void ShutdownBufferManagement();
extern rt_result InitPipelineManagement();
extern void ShutdownPipelineManagement();
extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
constexpr D3D_FEATURE_LEVEL feature_levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0};
UINT device_flags = 0;
#ifdef RT_DEBUG
device_flags |= D3D11_CREATE_DEVICE_DEBUG;
#endif
IDXGIAdapter *selected_adapter = RetrieveSelectedAdapter();
ID3D11Device *base_device;
ID3D11DeviceContext *base_context;
if (FAILED(D3D11CreateDevice(selected_adapter,
D3D_DRIVER_TYPE_HARDWARE,
nullptr,
device_flags,
feature_levels,
RT_ARRAY_COUNT(feature_levels),
D3D11_SDK_VERSION,
&base_device,
&g_gpu.feature_level,
&base_context))) {
rtLog("dx11", "Feature level 11.1 creation failed, retrying with feature level 11.0");
if (FAILED(D3D11CreateDevice(selected_adapter,
D3D_DRIVER_TYPE_HARDWARE,
nullptr,
device_flags,
&feature_levels[1],
RT_ARRAY_COUNT(feature_levels) - 1,
D3D11_SDK_VERSION,
&base_device,
&g_gpu.feature_level,
&base_context))) {
rtReportError("dx11", "Failed to create the d3d11 device.");
return RT_UNKNOWN_ERROR;
}
}
if (FAILED(base_device->QueryInterface(IID_PPV_ARGS(&g_gpu.device)))) {
rtReportError("dx11", "Failed to query the D3D11Device1 interface.");
return RT_UNKNOWN_ERROR;
}
if (FAILED(base_context->QueryInterface(IID_PPV_ARGS(&g_gpu.device_context)))) {
rtReportError("dx11", "Failed to query the D3D11DeviceContext1 interface.");
return RT_UNKNOWN_ERROR;
}
IDXGIDevice1 *dxgi_device;
if (FAILED(g_gpu.device->QueryInterface(&dxgi_device))) {
rtReportError("dx11", "Failed to query the DXGIDevice1 interface.");
return RT_UNKNOWN_ERROR;
}
IDXGIAdapter *adapter;
if (FAILED(dxgi_device->GetAdapter(&adapter))) {
rtReportError("dx11", "Failed to retrieve the dxgi adapter.");
return RT_UNKNOWN_ERROR;
}
if (FAILED(adapter->GetParent(IID_PPV_ARGS(&g_gpu.dxgi_factory)))) {
rtReportError("dx11", "Failed to retrieve the dxgi factory.");
return RT_UNKNOWN_ERROR;
}
g_gpu.device->CheckFeatureSupport(D3D11_FEATURE_THREADING,
&g_gpu.threading_support,
sizeof(g_gpu.threading_support));
g_gpu.swap_chain = CreateSwapChain(info->hWnd);
g_gpu.context_lock = rtCreateMutex();
rt_result res = InitCommandBufferManagement();
if (res != RT_SUCCESS)
return res;
res = InitRenderTargetManagement();
if (res != RT_SUCCESS)
return res;
res = InitBufferManagement();
if (res != RT_SUCCESS)
return res;
res = InitPipelineManagement();
if (res != RT_SUCCESS)
return res;
return RT_SUCCESS;
}
extern "C" void RT_RENDERER_API_FN(Shutdown)(void) {
ShutdownPipelineManagement();
ShutdownBufferManagement();
ShutdownRenderTargetManagement();
ShutdownCommandBufferManagement();
rtDestroyMutex(g_gpu.context_lock);
g_gpu.swap_chain.rtv.Reset();
g_gpu.swap_chain.swap_chain.Reset();
g_gpu.dxgi_factory.Reset();
g_gpu.device.Reset();
}
extern "C" unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
return RT_DX11_MAX_FRAMES_IN_FLIGHT;
}
extern "C" void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
RT_UNUSED(frame_id);
FLOAT clear_color[4] = {
0,
0,
0,
0,
};
rtLockMutex(g_gpu.context_lock);
g_gpu.device_context->ClearRenderTargetView(g_gpu.swap_chain.rtv.Get(), clear_color);
rtUnlockMutex(g_gpu.context_lock);
}
extern "C" void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
RT_UNUSED(frame_id);
rtLockMutex(g_gpu.context_lock);
UINT sync_interval = rt_Dx11VSync.i ? 1 : 0;
g_gpu.swap_chain.swap_chain->Present(sync_interval, 0);
rtUnlockMutex(g_gpu.context_lock);
}
// Copied from null. Delete once no longer needed
extern "C" {
#define RETURN_HANDLE_STUB2(type, initial) \
static unsigned int s_next = (initial); \
s_next = (s_next + 1) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX; \
type h = { \
1, \
s_next, \
}; \
return h;
#define RETURN_HANDLE_STUB(type) RETURN_HANDLE_STUB2(type, 1)
#define RETURN_HANDLE_ARRAY_STUB2(out, count, initial) \
static unsigned int s_next = (initial); \
for (uint32_t i = 0; i < (count); ++i) { \
(out)[i].index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX; \
(out)[i].version = 1; \
}
#define RETURN_HANDLE_ARRAY_STUB(out, count) RETURN_HANDLE_ARRAY_STUB2(out, count, 1)
rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count,
const rt_gpu_semaphore_info *info,
rt_gpu_semaphore_handle *p_semaphores) {
RT_UNUSED(info);
RETURN_HANDLE_ARRAY_STUB2(p_semaphores, count, 3)
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) {
RT_UNUSED(count);
RT_UNUSED(semaphores);
}
/* NOTE(Kevin): It might become necessary to actually track the value, to correctly simulate gpu
* behaviour */
uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle sem) {
RT_UNUSED(sem);
return 0;
}
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) {
return {1, 1};
}
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) {
return {1, 2};
}
}

View File

@ -1,31 +0,0 @@
if get_option('build_dx11')
dx11_dep = declare_dependency(link_args: ['-ld3d11', '-ldxgi', '-lwinmm', '-ldxguid'])
dx11_renderer_lib = library('rtdx11',
# Project Sources
'device_objects.hpp',
'gpu.hpp',
'../common/common_render_graph.h',
'buffers.cpp',
'commands.cpp',
'command_buffers.cpp',
'helpers.cpp',
'init.cpp',
'pipelines.cpp',
'render_graph.cpp',
'render_targets.cpp',
'../common/common_render_graph.c',
dependencies : [m_dep, windowing_dep, dx11_dep],
include_directories : [engine_incdir, contrib_incdir],
link_with : [runtime_lib],
cpp_pch : 'pch/dx11_pch.h',
override_options : ['b_sanitize=none'],
install : true)
engine_libs += dx11_renderer_lib
engine_lib_paths += dx11_renderer_lib.full_path()
endif

View File

@ -1,5 +0,0 @@
// DX11 headers
#include <wrl.h>
#include <d3d11.h>
#include <d3d11_1.h>
#include <dxgi1_3.h>

View File

@ -1,238 +0,0 @@
#include <d3d11.h>
#include <d3d11_1.h>
#include "gfx/effect.h"
#include "gfx/renderer_api.h"
#include "runtime/config.h"
#include "runtime/handles.h"
#include "runtime/mem_arena.h"
#include "runtime/threading_helpers.hpp"
#include "device_objects.hpp"
#include "gpu.hpp"
RT_CVAR_I(rt_Dx11MaxPipelines,
"Maximum number of simultaneously existing pipelines. Default: 128",
128);
static rt_pipeline *_pipelines;
static rt_pipeline *_first_free;
static rt_mutex *_lock;
rt_result InitPipelineManagement() {
_pipelines =
reinterpret_cast<rt_pipeline *>(calloc((size_t)rt_Dx11MaxPipelines.i, sizeof(rt_pipeline)));
if (!_pipelines)
return RT_OUT_OF_MEMORY;
_first_free = _pipelines + 1;
for (int i = 0; i < rt_Dx11MaxPipelines.i - 1; ++i)
_pipelines[i].next_free = &_pipelines[i + 1];
_lock = rtCreateMutex();
if (!_lock) {
free(_pipelines);
return RT_UNKNOWN_ERROR;
}
return RT_SUCCESS;
}
void ShutdownPipelineManagement() {
for (int i = 0; i < rt_Dx11MaxPipelines.i; ++i) {
if (_pipelines[i].compute_shader)
_pipelines[i].compute_shader->Release();
if (_pipelines[i].vertex_shader)
_pipelines[i].vertex_shader->Release();
if (_pipelines[i].pixel_shader)
_pipelines[i].pixel_shader->Release();
if (_pipelines[i].input_layout)
_pipelines[i].input_layout->Release();
}
free(_pipelines);
rtDestroyMutex(_lock);
}
rt_result GetShader(rt_resource_id id, rt_shader_info **p_shader, rt_arena *arena) {
size_t shader_size = rtGetResourceSize(id);
if (shader_size == 0)
return RT_INVALID_VALUE;
void *buffer = rtArenaPush(arena, shader_size);
if (!buffer)
return RT_OUT_OF_MEMORY;
rt_result res = rtGetResource(id, buffer);
if (res != RT_SUCCESS) {
rtArenaPop(arena, shader_size);
return res;
}
rt_resource *resource = reinterpret_cast<rt_resource *>(buffer);
RT_ASSERT(resource->type == RT_RESOURCE_SHADER, "Expected a shader");
*p_shader = reinterpret_cast<rt_shader_info *>(resource->data);
return RT_SUCCESS;
}
extern "C" rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
rt_pipeline *slot = nullptr;
{
auto lg = rtAutoLock(_lock);
slot = _first_free;
if (slot)
_first_free = slot->next_free;
}
if (!slot) {
rtLog("dx11", "Could not create pipeline, because no slots are available.");
return RT_INVALID_HANDLE;
}
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (info->vertex_shader != RT_INVALID_RESOURCE_ID) {
rt_shader_info *vs;
if (GetShader(info->vertex_shader, &vs, temp.arena) != RT_SUCCESS) {
rtReportError("dx11", "Could not retrieve vertex shader data.");
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
void *bytecode = rtResolveRelptr(&vs->bytecode);
if (FAILED(g_gpu.device->CreateVertexShader(bytecode,
vs->bytecode_length,
NULL,
&slot->vertex_shader))) {
rtReportError("dx11", "Vertex shader creation failed");
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
// TODO: effects should specify the expected vertex layout
// For now, we use a default
/* clang-format off */
D3D11_INPUT_ELEMENT_DESC default_layout[] = {
{"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TANGENT", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
};
/* clang-format on */
if (FAILED(g_gpu.device->CreateInputLayout(default_layout,
RT_ARRAY_COUNT(default_layout),
bytecode,
vs->bytecode_length,
&slot->input_layout))) {
rtReportError("dx11", "Failed to create the vertex layout.");
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
}
if (info->fragment_shader != RT_INVALID_RESOURCE_ID) {
rt_shader_info *vs;
if (GetShader(info->fragment_shader, &vs, temp.arena) != RT_SUCCESS) {
rtReportError("dx11", "Could not retrieve fragment shader data.");
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
void *bytecode = rtResolveRelptr(&vs->bytecode);
if (FAILED(g_gpu.device->CreatePixelShader(bytecode,
vs->bytecode_length,
NULL,
&slot->pixel_shader))) {
rtReportError("dx11", "Fragment shader creation failed");
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
}
if (info->compute_shader != RT_INVALID_RESOURCE_ID) {
rt_shader_info *vs;
if (GetShader(info->compute_shader, &vs, temp.arena) != RT_SUCCESS) {
rtReportError("dx11", "Could not retrieve compute shader data.");
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
void *bytecode = rtResolveRelptr(&vs->bytecode);
if (FAILED(g_gpu.device->CreateComputeShader(bytecode,
vs->bytecode_length,
NULL,
&slot->compute_shader))) {
rtReportError("dx11", "Compute shader creation failed");
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
}
// TODO: Effects should specifiy the rasterizer state
// DX11 only supports up to 4096 rasterizer state objects.
// We could cache these and only create the distinct objects.
D3D11_RASTERIZER_DESC rasterizer_desc;
rasterizer_desc.FillMode = D3D11_FILL_SOLID;
rasterizer_desc.CullMode = D3D11_CULL_NONE;
rasterizer_desc.FrontCounterClockwise = TRUE;
rasterizer_desc.DepthBias = 0;
rasterizer_desc.DepthBiasClamp = 0.f;
rasterizer_desc.SlopeScaledDepthBias = 0.f;
rasterizer_desc.DepthClipEnable = TRUE;
rasterizer_desc.ScissorEnable = FALSE;
rasterizer_desc.MultisampleEnable = TRUE;
rasterizer_desc.AntialiasedLineEnable = TRUE;
if (FAILED(g_gpu.device->CreateRasterizerState(&rasterizer_desc, &slot->rasterizer_state))) {
rtReportError("dx11", "Rasterizer state creation failed");
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
uint32_t index = static_cast<uint32_t>(slot - _pipelines);
return {slot->version, index};
}
extern "C" void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxPipelines.i)
return;
auto lg = rtAutoLock(_lock);
if (handle.version != _pipelines[handle.index].version)
return;
if (_pipelines[handle.index].compute_shader)
_pipelines[handle.index].compute_shader->Release();
if (_pipelines[handle.index].vertex_shader)
_pipelines[handle.index].vertex_shader->Release();
if (_pipelines[handle.index].pixel_shader)
_pipelines[handle.index].pixel_shader->Release();
if (_pipelines[handle.index].input_layout)
_pipelines[handle.index].input_layout->Release();
_pipelines[handle.index].next_free = _first_free;
_pipelines[handle.index].version =
(_pipelines[handle.index].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
_first_free = &_pipelines[handle.index];
}
rt_pipeline *rtGetPipeline(rt_pipeline_handle handle) {
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxPipelines.i)
return nullptr;
auto lg = rtAutoLock(_lock);
if (handle.version != _pipelines[handle.index].version)
return nullptr;
return &_pipelines[handle.index];
}

View File

@ -1,229 +0,0 @@
#include "gfx/render_view.h"
#include "gfx/renderer_api.h"
#include "renderer/common/common_render_graph.h"
#include "device_objects.hpp"
#include "gpu.hpp"
static constexpr unsigned int MAX_SUBMITTED_VIEWS_PER_PASS = 4;
struct rt_pass_runtime_data {
rt_render_view views[RT_DX11_MAX_FRAMES_IN_FLIGHT][MAX_SUBMITTED_VIEWS_PER_PASS];
uint32_t view_count[RT_DX11_MAX_FRAMES_IN_FLIGHT];
unsigned int views_frame_id[RT_DX11_MAX_FRAMES_IN_FLIGHT];
};
static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) {
return rtCreateRenderTarget({.format = rtinfo->format,
.width = rtinfo->width,
.height = rtinfo->height,
.name = rtinfo->name});
}
static int RequireExplicitSynchronization() {
return 0;
}
static size_t GetRuntimeDataSize() {
return sizeof(rt_pass_runtime_data);
}
extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
rt_render_graph_builder_platform_callbacks cbs{};
cbs.CreateRenderTarget = CreateRenderTarget;
cbs.RequireExplicitSynchronization = RequireExplicitSynchronization;
cbs.GetRuntimeDataSize = GetRuntimeDataSize;
return rtCreateRenderGraphBuilder(&cbs);
}
extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
rtDestroyRenderGraphBuilder(builder);
}
extern "C" void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
uint32_t pass_id,
rt_render_view view,
unsigned int frame_id) {
for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
if (render_graph->passes[i].id == pass_id) {
rt_render_pass *pass = &render_graph->passes[i];
rt_pass_runtime_data *runtime_data =
reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
RT_ASSERT(runtime_data->views_frame_id[frame_slot] == frame_id ||
runtime_data->views_frame_id[frame_slot] == 0,
"Tried to submit a view for a not-current frame.");
if (!RT_VERIFY(runtime_data->view_count[frame_slot] < MAX_SUBMITTED_VIEWS_PER_PASS))
return;
runtime_data->views[frame_slot][runtime_data->view_count[frame_slot]++] = view;
runtime_data->views_frame_id[frame_slot] = frame_id;
}
}
}
extern "C" void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph,
unsigned int frame_id) {
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
for (uint32_t i = 0; i < graph->pass_count; ++i) {
rt_pass_runtime_data *runtime_data =
reinterpret_cast<rt_pass_runtime_data *>(graph->passes[i].runtime_data);
#ifdef RT_DEBUG
memset(runtime_data->views[frame_slot], 0, sizeof(runtime_data->views[frame_slot]));
#endif
runtime_data->view_count[frame_slot] = 0;
runtime_data->views_frame_id[frame_slot] = 0;
}
}
static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdbuf_handle, unsigned int frame_id) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdbuf_handle);
if (!RT_VERIFY(cmd))
return RT_INVALID_VALUE;
if (cmd->annotation) {
WCHAR wname[128];
if (rtUTF8ToWStr(pass->name, wname, sizeof(wname)) == RT_SUCCESS)
cmd->annotation->BeginEvent(wname);
}
// Setup rtvs
ID3D11RenderTargetView *rtvs[4];
ID3D11DepthStencilView *dsv = nullptr;
for (uint32_t i = 0; i < pass->color_output_count; ++i) {
rt_render_target *rt = rtGetRenderTarget(pass->color_outputs[i]);
if (!RT_VERIFY(rt))
return RT_INVALID_VALUE;
RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target");
rtvs[i] = rt->rtv;
if (pass->color_loads[i] == RT_PASS_LOAD_MODE_CLEAR) {
FLOAT color[4] = {
pass->color_clear_values[i].r,
pass->color_clear_values[i].g,
pass->color_clear_values[i].b,
pass->color_clear_values[i].a,
};
cmd->context->ClearRenderTargetView(rt->rtv, color);
}
}
rt_render_target *dsvrt = rtGetRenderTarget(pass->depth_stencil);
if (dsvrt) {
RT_ASSERT(dsvrt->IsDepthStencilTarget(),
"Need to provide a valid depth stencil render target");
dsv = dsvrt->dsv;
if (pass->depth_stencil_load == RT_PASS_LOAD_MODE_CLEAR)
cmd->context->ClearDepthStencilView(
dsv,
(dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL
: D3D11_CLEAR_DEPTH,
pass->depth_stencil_clear_value.depth,
static_cast<UINT8>(pass->depth_stencil_clear_value.stencil));
}
cmd->context->OMSetRenderTargets(static_cast<UINT>(pass->color_output_count), rtvs, dsv);
D3D11_VIEWPORT viewport;
viewport.TopLeftX = pass->render_area.offset.x;
viewport.TopLeftY = pass->render_area.offset.y;
viewport.Width = pass->render_area.size.x;
viewport.Height = pass->render_area.size.y;
viewport.MinDepth = pass->min_depth;
viewport.MaxDepth = pass->max_depth;
if (viewport.Width == 0 || viewport.Height == 0) {
DXGI_SWAP_CHAIN_DESC desc;
g_gpu.swap_chain.swap_chain->GetDesc(&desc);
if (viewport.Width == 0)
viewport.Width = static_cast<float>(desc.BufferDesc.Width);
if (viewport.Height == 0)
viewport.Height = static_cast<float>(desc.BufferDesc.Height);
}
cmd->context->RSSetViewports(1, &viewport);
auto runtime_data = reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
RT_VERIFY(runtime_data);
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
rt_result res = RT_VERIFY(pass->Execute)(pass->id,
cmdbuf_handle,
runtime_data->views[frame_slot],
runtime_data->view_count[frame_slot],
pass->user_data);
if (cmd->annotation) {
cmd->annotation->EndEvent();
}
return res;
}
static bool IsCopyResourcePossible(const rt_render_target *backbuffer) {
DXGI_SWAP_CHAIN_DESC scd;
g_gpu.swap_chain.swap_chain->GetDesc(&scd);
D3D11_TEXTURE2D_DESC td;
backbuffer->texture->GetDesc(&td);
// This is more strict than necessary, because the formats could also be from the same group
return scd.BufferDesc.Width == td.Width && scd.BufferDesc.Height == td.Height &&
scd.SampleDesc.Count == td.SampleDesc.Count && scd.BufferDesc.Format == td.Format;
}
extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph, unsigned int frame_id) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return RT_OUT_OF_MEMORY;
// Alloc a command buffer for every pass
rt_command_buffer_handle *cmdbufs =
RT_ARENA_PUSH_ARRAY(temp.arena, rt_command_buffer_handle, render_graph->pass_count);
rt_result res = rtAllocCommandBuffers(render_graph->pass_count, cmdbufs);
if (res != RT_SUCCESS) {
rtReturnTemporaryArena(temp);
return res;
}
for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
rt_render_pass *pass = &render_graph->passes[i];
res = ExecutePass(pass, cmdbufs[i], frame_id);
if (res != RT_SUCCESS)
break;
}
if (res == RT_SUCCESS) {
res = rtSubmitCommandBuffers(render_graph->pass_count, cmdbufs);
}
// Copy backbuffer to swapchain
rt_render_target *backbuffer =
rtGetRenderTarget(render_graph->render_targets[render_graph->backbuffer_index]);
if (!backbuffer) {
rtReturnTemporaryArena(temp);
return RT_INVALID_VALUE;
}
ID3D11Texture2D *frame_buffer;
if (FAILED(g_gpu.swap_chain.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) {
rtReportError("dx11", "Failed to retrieve the backbuffer.");
rtReturnTemporaryArena(temp);
return RT_UNKNOWN_ERROR;
}
if (IsCopyResourcePossible(backbuffer)) {
g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
} else {
// NOTE(Kevin): The most flexible solution would probably be a fullscreen tri draw
// that implements a blit.
// Another idea would be a compute shader that does a copy&filter but that requires more
// work
RT_NOT_IMPLEMENTED;
}
rtReturnTemporaryArena(temp);
return res;
}

View File

@ -1,182 +0,0 @@
#include <d3d11.h>
#include <d3d11_1.h>
#include "gfx/renderer_api.h"
#include "runtime/config.h"
#include "runtime/handles.h"
#include "runtime/threading_helpers.hpp"
#include "device_objects.hpp"
#include "gpu.hpp"
RT_CVAR_I(rt_Dx11MaxRenderTargets,
"Maximum number of simultaneously existing render targets. Default: 128",
128);
static rt_render_target *_render_targets;
static rt_render_target *_first_free;
static rt_mutex *_lock;
rt_result InitRenderTargetManagement() {
_render_targets = reinterpret_cast<rt_render_target *>(
calloc((size_t)rt_Dx11MaxRenderTargets.i, sizeof(rt_render_target)));
if (!_render_targets) {
return RT_OUT_OF_MEMORY;
}
_lock = rtCreateMutex();
if (!_lock) {
free(_render_targets);
return RT_UNKNOWN_ERROR;
}
_render_targets[1].rtv = g_gpu.swap_chain.rtv.Get();
_render_targets[1].format = RT_PIXEL_FORMAT_B8G8R8A8_SRGB;
_render_targets[1].version = 1;
_first_free = _render_targets + 2;
for (int i = 0; i < rt_Dx11MaxRenderTargets.i; ++i) {
_render_targets[i].next_free = &_render_targets[i + 1];
}
return RT_SUCCESS;
}
void ShutdownRenderTargetManagement() {
// Swapchain rtv in slot 1 will be released elsewhere
for (int i = 2; i < rt_Dx11MaxRenderTargets.i; ++i) {
if (_render_targets[i].rtv)
_render_targets[i].rtv->Release();
if (_render_targets[i].dsv)
_render_targets[i].dsv->Release();
if (_render_targets[i].texture)
_render_targets[i].texture->Release();
}
free(_render_targets);
rtDestroyMutex(_lock);
}
rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info) {
rt_render_target *slot = nullptr;
{
auto lock_guard = rtAutoLock(_lock);
slot = _first_free;
_first_free = slot->next_free;
}
if (!slot) {
rtLog("dx11",
"Could not create a new render target, because all available slots are currently in "
"use.");
return RT_INVALID_HANDLE;
}
slot->format = info.format;
uint32_t swapchain_width = 0, swapchain_height = 0;
if (info.width == RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
info.height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
DXGI_SWAP_CHAIN_DESC desc;
g_gpu.swap_chain.swap_chain->GetDesc(&desc);
swapchain_width = desc.BufferDesc.Width;
swapchain_height = desc.BufferDesc.Height;
}
if (!rtIsDepthFormat(info.format)) {
D3D11_TEXTURE2D_DESC tex_desc = {};
tex_desc.Width =
(info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width;
tex_desc.Height =
(info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height;
tex_desc.MipLevels = 1;
tex_desc.ArraySize = 1;
tex_desc.Format = rtConvertPixelFormat(info.format);
tex_desc.SampleDesc.Count = 1;
tex_desc.SampleDesc.Quality = 0;
tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write
tex_desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
tex_desc.CPUAccessFlags = 0; // none
tex_desc.MiscFlags = 0;
if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) {
rtLog("dx11", "Failed to create backing texture for render target %s", info.name);
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
rtv_desc.Format = rtConvertPixelFormat(info.format);
rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
rtv_desc.Texture2D.MipSlice = 0;
if (FAILED(g_gpu.device->CreateRenderTargetView(slot->texture, &rtv_desc, &slot->rtv))) {
slot->texture->Release();
rtLog("dx11",
"Failed to create the render target view for render target %s",
info.name);
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
uint32_t index = static_cast<uint32_t>(slot - _render_targets);
return {.version = slot->version, .index = index};
} else {
D3D11_TEXTURE2D_DESC tex_desc = {};
tex_desc.Width =
(info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width;
tex_desc.Height =
(info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height;
tex_desc.MipLevels = 1;
tex_desc.ArraySize = 1;
tex_desc.Format = rtConvertPixelFormat(info.format);
tex_desc.SampleDesc.Count = 1;
tex_desc.SampleDesc.Quality = 0;
tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write
tex_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL;
tex_desc.CPUAccessFlags = 0; // none
tex_desc.MiscFlags = 0;
if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) {
rtLog("dx11", "Failed to create backing texture for render target %s", info.name);
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
D3D11_DEPTH_STENCIL_VIEW_DESC dsv_desc = {};
dsv_desc.Format = rtConvertPixelFormat(info.format);
dsv_desc.Flags = 0;
dsv_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D;
dsv_desc.Texture2D.MipSlice = 0;
if (FAILED(g_gpu.device->CreateDepthStencilView(slot->texture, &dsv_desc, &slot->dsv))) {
slot->texture->Release();
rtLog("dx11",
"Failed to create the depth stencil view for render target %s",
info.name);
auto lg = rtAutoLock(_lock);
slot->next_free = _first_free;
_first_free = slot;
return RT_INVALID_HANDLE;
}
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
uint32_t index = static_cast<uint32_t>(slot - _render_targets);
return {.version = slot->version, .index = index};
}
}
void rtDestroyRenderTarget(rt_render_target_handle handle) {
RT_UNUSED(handle);
}
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle) {
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxRenderTargets.i)
return nullptr;
auto lg = rtAutoLock(_lock);
if (_render_targets[handle.index].version != handle.version)
return nullptr;
return &_render_targets[handle.index];
}

View File

@ -1,10 +0,0 @@
null_renderer_lib = library('rtnull',
'null.c',
'../common/common_render_graph.c',
include_directories : engine_incdir,
link_with : runtime_lib,
install : true)
engine_libs += null_renderer_lib
engine_lib_paths += null_renderer_lib.full_path()

View File

@ -1,159 +0,0 @@
/* "Null" renderer implementation.
* Useful for headless testing */
#include "gfx/renderer_api.h"
#include "runtime/runtime.h"
#include "../common/common_render_graph.h"
#define RETURN_HANDLE_STUB2(type, initial) \
static unsigned int s_next = (initial); \
return (type) { .index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX, .version = 1 }
#define RETURN_HANDLE_STUB(type) RETURN_HANDLE_STUB2(type, 1)
#define RETURN_HANDLE_ARRAY_STUB2(out, count, initial) \
static unsigned int s_next = (initial); \
for (uint32_t i = 0; i < (count); ++i) { \
(out)[i].index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX; \
(out)[i].version = 1; \
}
#define RETURN_HANDLE_ARRAY_STUB(out, count) RETURN_HANDLE_ARRAY_STUB2(out, count, 1)
void RT_RENDERER_API_FN(RegisterCVars)(void) {
}
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
RT_UNUSED(info);
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(Shutdown)(void) {
}
unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
return 2;
}
void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
RT_UNUSED(frame_id);
}
void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
RT_UNUSED(frame_id);
}
rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
RT_UNUSED(info);
RETURN_HANDLE_STUB(rt_pipeline_handle);
}
void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
RT_UNUSED(handle);
}
rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
const rt_alloc_command_buffer_info *info,
rt_command_buffer_handle *p_command_buffers) {
RT_UNUSED(info);
RETURN_HANDLE_ARRAY_STUB(p_command_buffers, count)
return RT_SUCCESS;
}
rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
const rt_submit_command_buffers_info *info) {
RT_UNUSED(queue);
RT_UNUSED(info);
return RT_SUCCESS;
}
rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
const rt_buffer_info *info,
rt_buffer_handle *p_buffers) {
RT_UNUSED(info);
RETURN_HANDLE_ARRAY_STUB(p_buffers, count);
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
RT_UNUSED(count);
RT_UNUSED(buffers);
}
void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmd,
const rt_cmd_begin_pass_info *info) {
RT_UNUSED(cmd);
RT_UNUSED(info);
}
void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmd) {
RT_UNUSED(cmd);
}
void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmd,
rt_render_target_handle target,
rt_render_target_state state) {
RT_UNUSED(cmd);
RT_UNUSED(target);
RT_UNUSED(state);
}
void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle,
rt_render_target_handle render_target) {
RT_UNUSED(cmdbuf_handle);
RT_UNUSED(render_target);
}
static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *info) {
RETURN_HANDLE_STUB(rt_render_target_handle);
}
static int RequireExplicitSync(void) {
return 0;
}
rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = CreateRenderTarget,
.RequireExplicitSynchronization =
RequireExplicitSync};
return rtCreateRenderGraphBuilder(&cbs);
}
void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
rtDestroyRenderGraphBuilder(builder);
}
rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
RT_UNUSED(render_graph);
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
uint32_t pass_id,
rt_render_view view,
unsigned int frame_id) {
RT_UNUSED(render_graph);
RT_UNUSED(pass_id);
RT_UNUSED(view);
RT_UNUSED(frame_id);
}
void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) {
RT_UNUSED(graph);
}
void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle,
rt_pipeline_handle pipeline_handle) {
}
void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle,
uint32_t first_binding,
uint32_t count,
const rt_buffer_handle *buffers,
const uint64_t *_offsets) {
}
void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle,
uint32_t first,
uint32_t count) {
}

View File

@ -1,219 +0,0 @@
#include "command_buffers.h"
#include "gpu.h"
#include "transfers.h"
#include "resources.h"
#include "gfx/renderer_api.h"
#include "runtime/config.h"
#include "runtime/threading.h"
#include <stdlib.h>
#include <string.h>
RT_CVAR_I(rt_VkMaxBufferCount, "Number of slots for gpu buffers. Default: 1024.", 1024);
typedef struct rt_buffer_data_s {
rt_buffer data;
uint32_t version;
struct rt_buffer_data_s *next_free;
} rt_buffer_data;
static rt_buffer_data *_buffers;
static rt_buffer_data *_first_free;
static rt_mutex *_list_lock;
rt_result InitBufferManagement(void) {
size_t n = (size_t)rt_VkMaxBufferCount.i;
_buffers = calloc(n, sizeof(rt_buffer_data));
if (!_buffers)
return RT_OUT_OF_MEMORY;
_first_free = &_buffers[1];
for (size_t i = 1; i < n - 1; ++i)
_buffers[i].next_free = &_buffers[i + 1];
_list_lock = rtCreateMutex();
return RT_SUCCESS;
}
void ShutdownBufferManagement(void) {
for (int i = 0; i < rt_VkMaxBufferCount.i; ++i) {
if (_buffers[i].data.buffer == VK_NULL_HANDLE)
continue;
vmaDestroyBuffer(g_gpu.allocator, _buffers[i].data.buffer, _buffers[i].data.allocation);
rtDestroyRWLock(&_buffers[i].data.lock);
memset(&_buffers[i], 0, sizeof(_buffers[i]));
}
free(_buffers);
_first_free = NULL;
rtDestroyMutex(_list_lock);
}
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers);
static void UploadViaMap(rt_buffer_data *buffer, const void *data, size_t size) {
rtLockWrite(&buffer->data.lock);
void *dev_mem = NULL;
if (vmaMapMemory(g_gpu.allocator, buffer->data.allocation, &dev_mem) != VK_SUCCESS) {
rtReportError("vk", "Unable to map buffer for upload");
rtUnlockWrite(&buffer->data.lock);
return;
}
memcpy(dev_mem, data, size);
vmaUnmapMemory(g_gpu.allocator, buffer->data.allocation);
if (!buffer->data.coherent)
vmaFlushAllocation(g_gpu.allocator, buffer->data.allocation, 0, VK_WHOLE_SIZE);
rtUnlockWrite(&buffer->data.lock);
}
/* Convenience function that decides between mapping or uploading via transfer buffer */
static void UploadData(rt_buffer_data *buffer, const void *data, size_t size) {
if (buffer->data.mappable)
UploadViaMap(buffer, data, size);
else
rtUploadToBuffer(buffer->data.buffer,
buffer->data.allocation,
buffer->data.owner,
data,
size);
}
rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
const rt_buffer_info *info,
rt_buffer_handle *p_buffers) {
for (uint32_t i = 0; i < count; ++i) {
rtLockMutex(_list_lock);
rt_buffer_data *slot = _first_free;
if (!slot) {
rtUnlockMutex(_list_lock);
if (i > 0)
rtRenDestroyBuffers(i, p_buffers);
return RT_OUT_OF_MEMORY;
}
_first_free = slot->next_free;
rtUnlockMutex(_list_lock);
VkBufferUsageFlags buffer_usage = 0;
switch (info->type) {
case RT_BUFFER_TYPE_VERTEX:
buffer_usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
break;
case RT_BUFFER_TYPE_INDEX:
buffer_usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
break;
case RT_BUFFER_TYPE_STORAGE:
buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
break;
case RT_BUFFER_TYPE_UNIFORM:
buffer_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
break;
}
buffer_usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
VkBufferCreateInfo buffer_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = info->size,
.usage = buffer_usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
VmaMemoryUsage alloc_usage = 0;
VmaAllocationCreateFlags alloc_flags = 0;
switch (info->usage) {
case RT_BUFFER_USAGE_STATIC:
alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
alloc_flags = 0;
break;
case RT_BUFFER_USAGE_DYNAMIC:
alloc_usage = VMA_MEMORY_USAGE_AUTO;
alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
break;
case RT_BUFFER_USAGE_TRANSIENT:
alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
break;
}
VmaAllocationCreateInfo alloc_info = {.usage = alloc_usage, .flags = alloc_flags};
VkResult res = vmaCreateBuffer(g_gpu.allocator,
&buffer_info,
&alloc_info,
&slot->data.buffer,
&slot->data.allocation,
NULL);
if (res != VK_SUCCESS) {
rtReportError("vk", "Failed to create a buffer: %u", res);
rtLockMutex(_list_lock);
slot->next_free = _first_free;
_first_free = slot;
rtUnlockMutex(_list_lock);
if (i > 0)
rtRenDestroyBuffers(i, p_buffers);
return RT_UNKNOWN_ERROR;
}
rt_create_rwlock_result lock_res = rtCreateRWLock();
if (!lock_res.ok) {
rtReportError("vk", "Failed to create lock for buffer.");
vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation);
rtLockMutex(_list_lock);
slot->next_free = _first_free;
_first_free = slot;
rtUnlockMutex(_list_lock);
if (i > 0)
rtRenDestroyBuffers(i, p_buffers);
return RT_UNKNOWN_ERROR;
}
VkMemoryPropertyFlags properties;
vmaGetAllocationMemoryProperties(g_gpu.allocator, slot->data.allocation, &properties);
slot->data.mappable = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
slot->data.coherent = (properties & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
slot->data.owner = RT_VK_UNOWNED;
slot->data.state = RT_BUFFER_STATE_NOT_USED;
if (info->data)
UploadData(slot, info->data, info->size);
ptrdiff_t index = slot - _buffers;
p_buffers[i].index = (uint32_t)index;
p_buffers[i].version = slot->version;
}
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
for (uint32_t i = 0; i < count; ++i) {
if (buffers[i].index >= (uint32_t)rt_VkMaxBufferCount.i)
continue;
rt_buffer_data *slot = &_buffers[buffers[i].index];
if (slot->version != buffers[i].version) {
rtLog("vk", "Tried to destroy a buffer with an invalid handle (version mismatch).");
continue;
}
rtLockWrite(&slot->data.lock);
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation);
slot->data.buffer = VK_NULL_HANDLE;
slot->data.allocation = VK_NULL_HANDLE;
rtUnlockWrite(&slot->data.lock);
rtDestroyRWLock(&slot->data.lock);
rtLockMutex(_list_lock);
slot->next_free = _first_free;
_first_free = slot;
rtUnlockMutex(_list_lock);
}
}
rt_buffer *rtGetBuffer(rt_buffer_handle handle) {
if (handle.index >= (uint32_t)rt_VkMaxBufferCount.i)
return NULL;
rt_buffer_data *slot = &_buffers[handle.index];
if (slot->version != handle.version) {
rtLog("vk", "Tried to access a buffer with an invalid handle (version mismatch).");
return NULL;
}
return &slot->data;
}

View File

@ -1,490 +0,0 @@
#include "gpu.h"
#include "gpu_sync.h"
#include "swapchain.h"
#include "runtime/atomics.h"
#include "runtime/config.h"
#include "runtime/handles.h"
#include "runtime/mem_arena.h"
#include "runtime/runtime.h"
#include "gfx/renderer_api.h"
#include <stdlib.h>
RT_CVAR_I(rt_VkMaxCommandPools,
"Maximum number of command pools that can be created. Default: 32",
32);
RT_CVAR_I(
rt_VkCommandBufferRingBufferSize,
"Size of the ring buffer used to store command buffers. Must be a power of two! Default: 512",
512);
typedef struct {
VkCommandPool pools[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT * 3];
uint32_t distinct_pool_count;
VkCommandPool *compute_pools;
VkCommandPool *graphics_pools;
VkCommandPool *transfer_pools;
} rt_thread_pools;
typedef struct {
VkCommandBuffer command_buffer;
uint32_t version;
rt_gpu_queue target_queue;
} rt_command_buffer;
static rt_thread_pools *_pools;
static uint32_t _next_pools;
static RT_THREAD_LOCAL unsigned int t_first_pool;
static rt_command_buffer *_command_buffers;
/* We let this overflow on its own. Use MOD rt_VkCommandBufferRingBufferSize to get the actual
* index. */
static uint32_t _next_command_buffer;
rt_result InitCommandBufferManagement(void) {
_pools = calloc((size_t)rt_VkMaxCommandPools.i, sizeof(rt_thread_pools));
if (!_pools)
return RT_OUT_OF_MEMORY;
_command_buffers =
calloc((size_t)rt_VkCommandBufferRingBufferSize.i, sizeof(rt_command_buffer));
if (!_command_buffers) {
free(_pools);
return RT_OUT_OF_MEMORY;
}
/* We keep 0 free as a "Not initialized" value for t_first_pool.
* The atomicinc used to acquire a pool returns the incremented value, so 0 is never returned.
*/
_next_pools = 0;
return RT_SUCCESS;
}
static void DestroyPools(rt_thread_pools *pools) {
for (uint32_t j = 0; j < pools->distinct_pool_count; ++j)
vkDestroyCommandPool(g_gpu.device, pools->pools[j], g_gpu.alloc_cb);
free(_pools);
}
void ShutdownCommandBufferManagement(void) {
/* _next_pools is the number of existing pools */
for (uint32_t i = 1; i < _next_pools; ++i) {
DestroyPools(&_pools[i]);
}
}
void rtResetCommandPools(unsigned int frame_id) {
unsigned int pool_idx = frame_id % g_gpu.max_frames_in_flight;
for (uint32_t i = 1; i < _next_pools; ++i) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].graphics_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset graphics pool slot %u index %u", i, pool_idx);
}
if (_pools[i].compute_pools != _pools[i].graphics_pools) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].compute_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset compute pool slot %u index %u", i, pool_idx);
}
}
if (_pools[i].transfer_pools != _pools[i].graphics_pools &&
_pools[i].transfer_pools != _pools[i].compute_pools) {
if (vkResetCommandPool(g_gpu.device,
_pools[i].transfer_pools[pool_idx],
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
rtLog("vk", "Failed to reset transfer pool slot %u index %u", i, pool_idx);
}
}
}
}
static rt_result CreatePools(rt_thread_pools *pools) {
/* Graphics pools */
pools->graphics_pools = pools->pools;
pools->distinct_pool_count = 0;
VkCommandPoolCreateInfo graphics_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.graphics_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT};
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&graphics_info,
g_gpu.alloc_cb,
&pools->graphics_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a graphics command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
if (g_gpu.compute_family != g_gpu.graphics_family) {
VkCommandPoolCreateInfo compute_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.compute_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
};
pools->compute_pools = &pools->pools[pools->distinct_pool_count];
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&compute_info,
g_gpu.alloc_cb,
&pools->compute_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a compute command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
} else {
pools->compute_pools = pools->graphics_pools;
}
if (g_gpu.transfer_family != g_gpu.graphics_family &&
g_gpu.transfer_family != g_gpu.compute_family) {
VkCommandPoolCreateInfo transfer_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = g_gpu.transfer_family,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
};
pools->transfer_pools = &pools->pools[pools->distinct_pool_count];
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
if (vkCreateCommandPool(g_gpu.device,
&transfer_info,
g_gpu.alloc_cb,
&pools->transfer_pools[i]) != VK_SUCCESS) {
rtLog("vk", "Failed to create a transfer command pool.");
DestroyPools(pools);
return RT_UNKNOWN_ERROR;
}
++pools->distinct_pool_count;
}
} else if (g_gpu.transfer_family == g_gpu.graphics_family) {
pools->transfer_pools = pools->graphics_pools;
} else if (g_gpu.transfer_family == g_gpu.compute_family) {
pools->transfer_pools = pools->compute_pools;
}
return RT_SUCCESS;
}
rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
const rt_alloc_command_buffer_info *info,
rt_command_buffer_handle *p_command_buffers) {
rt_thread_pools *pools = &_pools[t_first_pool];
if (t_first_pool == 0) {
/* Acquire pools */
t_first_pool = rtAtomic32Inc(&_next_pools);
RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
pools = &_pools[t_first_pool];
rt_result create_res = CreatePools(pools);
if (create_res != RT_SUCCESS)
return create_res;
}
if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
return RT_OUT_OF_MEMORY;
uint32_t frame_id = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
rt_result result = RT_SUCCESS;
/* TODO: We should probably batch allocations of the same type */
uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
uint32_t start = rtAtomic32FetchAdd(&_next_command_buffer, count);
for (uint32_t i = 0; i < count; ++i) {
uint32_t slot = (start + i) % mod;
_command_buffers[slot].version =
(_command_buffers[slot].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
if (_command_buffers[slot].version == 0)
_command_buffers[slot].version = 1;
VkCommandPool pool = pools->graphics_pools[frame_id];
if (info[i].target_queue == RT_COMPUTE_QUEUE)
pool = pools->compute_pools[frame_id];
else if (info[i].target_queue == RT_TRANSFER_QUEUE)
pool = pools->transfer_pools[frame_id];
_command_buffers[slot].target_queue = info[i].target_queue;
VkCommandBufferAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
.commandPool = pool,
};
if (vkAllocateCommandBuffers(g_gpu.device,
&alloc_info,
&_command_buffers[slot].command_buffer) != VK_SUCCESS) {
result = RT_UNKNOWN_ERROR;
break;
}
VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
vkBeginCommandBuffer(_command_buffers[slot].command_buffer, &begin_info);
p_command_buffers[i].index = (slot + 1);
p_command_buffers[i].version = _command_buffers[slot].version;
}
return result;
}
#define RT_VK_LOG_SUBMIT_INFO 1
rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
const rt_submit_command_buffers_info *info) {
uint32_t count = info->command_buffer_count;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return RT_OUT_OF_MEMORY;
rt_result result = RT_SUCCESS;
VkQueue target_queue = rtGetQueue(queue);
VkCommandBufferSubmitInfo *command_buffers =
RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBufferSubmitInfo, count);
if (!command_buffers) {
result = RT_OUT_OF_MEMORY;
goto out;
}
VkSemaphoreSubmitInfo *wait_semaphores =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->wait_semaphore_count);
if (!wait_semaphores && info->wait_semaphore_count > 0) {
result = RT_OUT_OF_MEMORY;
goto out;
}
VkSemaphoreSubmitInfo *signal_semaphores =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->signal_semaphore_count);
if (!signal_semaphores && info->signal_semaphore_count > 0) {
result = RT_OUT_OF_MEMORY;
goto out;
}
uint32_t wait_count = info->wait_semaphore_count;
uint32_t signal_count = info->signal_semaphore_count;
for (uint32_t i = 0; i < wait_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = rtGetSemaphore(info->wait_semaphores[i]),
.value = info->wait_values[i],
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
wait_semaphores[i] = semaphore_info;
}
for (uint32_t i = 0; i < signal_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = rtGetSemaphore(info->signal_semaphores[i]),
.value = info->signal_values[i],
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
signal_semaphores[i] = semaphore_info;
}
for (uint32_t i = 0; i < count; ++i) {
if (!RT_IS_HANDLE_VALID(info->command_buffers[i])) {
rtLog("vk", "Tried to submit an invalid command buffer.");
result = RT_INVALID_VALUE;
goto out;
}
uint32_t slot = info->command_buffers[i].index - 1;
if (_command_buffers[slot].version != info->command_buffers[i].version) {
rtLog("vk",
"Mismatch between handle version and stored version while submitting a command "
"buffer");
result = RT_INVALID_VALUE;
goto out;
}
if (_command_buffers[slot].target_queue != queue) {
rtLog("vk", "Mismatch between command buffer target queue and submit target queue.");
result = RT_INVALID_VALUE;
goto out;
}
command_buffers[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO;
command_buffers[i].pNext = NULL;
command_buffers[i].deviceMask = 0;
command_buffers[i].commandBuffer = _command_buffers[slot].command_buffer;
vkEndCommandBuffer(command_buffers[i].commandBuffer);
}
#if RT_VK_LOG_SUBMIT_INFO
{
const char *queue_str = "<invalid>";
if (queue == RT_GRAPHICS_QUEUE)
queue_str = "GRAPHICS";
else if (queue == RT_COMPUTE_QUEUE)
queue_str = "COMPUTE";
else if (queue == RT_TRANSFER_QUEUE)
queue_str = "TRANSFER";
rtLog("vk", "Submit Info");
rtLog("vk", "Queue: %s", queue_str);
rtLog("vk", "Command Buffers: %u", count);
rtLog("vk", " - TODO: More Info");
rtLog("vk", "Wait Semaphores:");
for (uint32_t i = 0; i < wait_count; ++i) {
rtLog("vk",
" - %u:%u Value %u",
info->wait_semaphores[i].version,
info->wait_semaphores[i].index,
info->wait_values[i]);
}
rtLog("vk", "Signal Semaphores:");
for (uint32_t i = 0; i < signal_count; ++i) {
rtLog("vk",
" - %u:%u Value %u",
info->signal_semaphores[i].version,
info->signal_semaphores[i].index,
info->signal_values[i]);
}
}
#endif
VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = wait_count,
.signalSemaphoreInfoCount = signal_count,
.pWaitSemaphoreInfos = wait_semaphores,
.pSignalSemaphoreInfos = signal_semaphores,
.commandBufferInfoCount = count,
.pCommandBufferInfos = command_buffers,
};
if (vkQueueSubmit2(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
rtLog("vk", "vkQueueSubmit failed.");
result = RT_UNKNOWN_ERROR;
}
out:
rtReturnTemporaryArena(temp);
return result;
}
VkCommandBuffer rtGetCommandBuffer(rt_command_buffer_handle cmdbuf) {
uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
if (!RT_IS_HANDLE_VALID(cmdbuf))
return VK_NULL_HANDLE;
uint32_t slot = (cmdbuf.index - 1) % mod;
if (_command_buffers[slot].version != cmdbuf.version) {
return VK_NULL_HANDLE;
}
return _command_buffers[slot].command_buffer;
}
VkCommandBuffer rtAllocSingleCommandBuffer(rt_gpu_queue queue) {
rt_thread_pools *pools = &_pools[t_first_pool];
if (t_first_pool == 0) {
/* Acquire pools */
t_first_pool = rtAtomic32Inc(&_next_pools);
RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
pools = &_pools[t_first_pool];
rt_result create_res = CreatePools(pools);
if (create_res != RT_SUCCESS)
return VK_NULL_HANDLE;
}
if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
return VK_NULL_HANDLE;
uint32_t frame_id = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
VkCommandPool pool = pools->graphics_pools[frame_id];
if (queue == RT_COMPUTE_QUEUE)
pool = pools->compute_pools[frame_id];
else if (queue == RT_TRANSFER_QUEUE)
pool = pools->transfer_pools[frame_id];
VkCommandBufferAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
.commandPool = pool,
};
VkCommandBuffer cmdbuf;
if (vkAllocateCommandBuffers(g_gpu.device, &alloc_info, &cmdbuf) != VK_SUCCESS) {
return VK_NULL_HANDLE;
}
return cmdbuf;
}
rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
const VkSemaphore *wait_semaphores,
const uint32_t *wait_values,
uint32_t wait_semaphore_count,
const VkSemaphore *signal_semaphores,
const uint32_t *signal_values,
uint32_t signal_semaphore_count,
rt_gpu_queue queue,
VkFence fence) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return RT_OUT_OF_MEMORY;
VkQueue target_queue = rtGetQueue(queue);
rt_result result = RT_SUCCESS;
VkSemaphoreSubmitInfo *wait_semaphore_info =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, wait_semaphore_count);
if (!wait_semaphore_info && wait_semaphore_count > 0) {
result = RT_OUT_OF_MEMORY;
goto out;
}
VkSemaphoreSubmitInfo *signal_semaphore_info =
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, signal_semaphore_count);
if (!signal_semaphore_info && signal_semaphore_count > 0) {
result = RT_OUT_OF_MEMORY;
goto out;
}
uint32_t wait_count = wait_semaphore_count;
uint32_t signal_count = signal_semaphore_count;
for (uint32_t i = 0; i < wait_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = wait_semaphores[i],
.value = (wait_values) ? wait_values[i] : 0,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
wait_semaphore_info[i] = semaphore_info;
}
for (uint32_t i = 0; i < signal_count; ++i) {
VkSemaphoreSubmitInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = signal_semaphores[i],
.value = (signal_values) ? signal_values[i] : 0,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
signal_semaphore_info[i] = semaphore_info;
}
VkCommandBufferSubmitInfo command_buffer_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.deviceMask = 0,
.commandBuffer = command_buffer,
};
VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = wait_count,
.signalSemaphoreInfoCount = signal_count,
.pWaitSemaphoreInfos = wait_semaphore_info,
.pSignalSemaphoreInfos = signal_semaphore_info,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &command_buffer_info,
};
if (vkQueueSubmit2(target_queue, 1, &submit_info, fence) != VK_SUCCESS) {
rtLog("vk", "vkQueueSubmit failed.");
result = RT_UNKNOWN_ERROR;
}
out:
rtReturnTemporaryArena(temp);
return result;
}

View File

@ -1,25 +0,0 @@
#ifndef RT_COMMAND_BUFFERS_H
#define RT_COMMAND_BUFFERS_H
#include "gfx/renderer_api.h"
#include "runtime/runtime.h"
#include <volk/volk.h>
void rtResetCommandPools(unsigned int frame_id);
VkCommandBuffer rtGetCommandBuffer(rt_command_buffer_handle cmdbuf);
VkCommandBuffer rtAllocSingleCommandBuffer(rt_gpu_queue queue);
rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
const VkSemaphore *wait_semaphores,
const uint32_t *wait_values,
uint32_t wait_semaphore_count,
const VkSemaphore *signal_semaphores,
const uint32_t *signal_values,
uint32_t signal_semaphore_count,
rt_gpu_queue queue,
VkFence fence);
#endif

View File

@ -1,510 +0,0 @@
#include "command_buffers.h"
#include "gpu.h"
#include "render_targets.h"
#include "swapchain.h"
#include "gfx/renderer_api.h"
#include "runtime/handles.h"
#include "runtime/mem_arena.h"
#include <string.h>
#define USE_SIMPLE_SYNC_LIB 0
#if USE_SIMPLE_SYNC_LIB
#include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h"
#include <stdbool.h>
#endif
/* Retrieve the VkCommandBuffer as varname, or return */
#define GET_CMDBUF(varname, handle) \
VkCommandBuffer varname = rtGetCommandBuffer((handle)); \
if (varname == VK_NULL_HANDLE) { \
rtLog("vk", "Failed to retrive VkCommandBuffer for %s", __FUNCTION__); \
return; \
}
void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmdbuf_handle,
const rt_cmd_begin_pass_info *info) {
GET_CMDBUF(cmdbuf, cmdbuf_handle)
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena) {
rtReportError("vk", "Failed to acquire a temporary arena for CmdBeginPass");
return;
}
#ifdef RT_DEBUG
VkDebugUtilsLabelEXT debug_label = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.color = {0.39f, 0.58f, 0.92f, 1.f},
.pLabelName = (info->name) ? info->name : "Unnamed pass",
};
vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
#endif
/* Acquire the necessary attachments */
VkRenderingAttachmentInfo *colorbuffers =
RT_ARENA_PUSH_ARRAY_ZERO(temp.arena, VkRenderingAttachmentInfo, info->color_buffer_count);
for (uint32_t i = 0; i < info->color_buffer_count; ++i) {
VkImageView image_view = VK_NULL_HANDLE;
if (RT_IS_HANDLE_VALID(info->color_buffers[i])) {
rt_render_target *rt = rtGetRenderTarget(info->color_buffers[i]);
if (rt)
image_view = rt->view[g_gpu.current_frame_id % g_gpu.max_frames_in_flight];
}
colorbuffers[i].sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
colorbuffers[i].pNext = NULL;
colorbuffers[i].imageView = image_view;
colorbuffers[i].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
switch (info->color_buffer_loads[i]) {
case RT_PASS_LOAD_MODE_CLEAR:
colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
break;
case RT_PASS_LOAD_MODE_LOAD:
colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
break;
default:
colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
break;
}
switch (info->color_buffer_writes[i]) {
case RT_PASS_WRITE_MODE_STORE:
colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
break;
case RT_PASS_WRITE_MODE_DISCARD:
colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
break;
default:
colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_NONE;
break;
}
memcpy(&colorbuffers[i].clearValue.color.float32,
info->color_buffer_clear_values[i].color.v,
sizeof(float) * 4);
/* TODO: Multisample resolve */
colorbuffers[i].resolveMode = VK_RESOLVE_MODE_NONE;
colorbuffers[i].resolveImageView = VK_NULL_HANDLE;
colorbuffers[i].resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
}
/* depth and stencil might be the same */
VkRenderingAttachmentInfo *depth_stencil_buffer =
RT_IS_HANDLE_VALID(info->depth_stencil_buffer)
? RT_ARENA_PUSH_STRUCT_ZERO(temp.arena, VkRenderingAttachmentInfo)
: NULL;
if (depth_stencil_buffer) {
VkImageView image_view = VK_NULL_HANDLE;
rt_render_target *rt = rtGetRenderTarget(info->depth_stencil_buffer);
if (rt)
image_view = rt->view[g_gpu.current_frame_id % g_gpu.max_frames_in_flight];
depth_stencil_buffer->sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
depth_stencil_buffer->pNext = NULL;
depth_stencil_buffer->imageView = image_view;
depth_stencil_buffer->imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
switch (info->depth_stencil_buffer_load) {
case RT_PASS_LOAD_MODE_CLEAR:
depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
break;
case RT_PASS_LOAD_MODE_LOAD:
depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
break;
default:
depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
break;
}
switch (info->depth_stencil_buffer_write) {
case RT_PASS_WRITE_MODE_STORE:
depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
break;
case RT_PASS_WRITE_MODE_DISCARD:
depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
break;
default:
depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_NONE;
break;
}
/* TODO: Multisample resolve */
depth_stencil_buffer->resolveMode = VK_RESOLVE_MODE_NONE;
depth_stencil_buffer->resolveImageView = VK_NULL_HANDLE;
depth_stencil_buffer->resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
}
VkRect2D render_area = {
.offset = { .x = info->render_area.offset.x, .y = info->render_area.offset.y},
.extent = {.width = info->render_area.size.x, .height = info->render_area.size.y}
};
if (render_area.extent.width == 0)
render_area.extent.width = g_swapchain.extent.width;
if (render_area.extent.height == 0)
render_area.extent.height = g_swapchain.extent.height;
VkRenderingInfo rendering_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
.pColorAttachments = colorbuffers,
.colorAttachmentCount = info->color_buffer_count,
.pDepthAttachment = depth_stencil_buffer,
.pStencilAttachment = depth_stencil_buffer,
.layerCount = 1,
.renderArea = render_area,
};
vkCmdBeginRendering(cmdbuf, &rendering_info);
rtReturnTemporaryArena(temp);
}
void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmdbuf_handle) {
GET_CMDBUF(cmdbuf, cmdbuf_handle)
vkCmdEndRendering(cmdbuf);
#ifdef RT_DEBUG
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
#endif
}
/* Non-layout transition barrier */
static void ExecuteRenderTargetBarrier(rt_render_target *rt,
uint32_t image_index,
VkCommandBuffer cmdbuf) { /* Determine old and new layout */
VkImageLayout layout;
switch (rt->states[image_index]) {
case RT_RENDER_TARGET_STATE_ATTACHMENT:
layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
break;
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
break;
default:
layout = VK_IMAGE_LAYOUT_UNDEFINED;
break;
}
#ifdef RT_DEBUG
VkDebugUtilsLabelEXT debug_label = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.pLabelName = "Render Target Barrier",
.color = {.13f, .54f, .13f, .75f},
};
vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
#endif
VkImageAspectFlags aspect_mask =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
: VK_IMAGE_ASPECT_COLOR_BIT;
/* Determine access flags */
VkPipelineStageFlags2 src_stage = 0;
VkPipelineStageFlags2 dst_stage = 0;
VkAccessFlags2 src_access = 0;
VkAccessFlags2 dst_access = 0;
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
src_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
dst_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT;
src_stage =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
dst_stage =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
src_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT;
dst_access = VK_ACCESS_2_SHADER_READ_BIT;
src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
dst_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
}
VkImageMemoryBarrier2 image_barrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.srcStageMask = src_stage,
.srcAccessMask = src_access,
.dstStageMask = dst_stage,
.dstAccessMask = dst_access,
.oldLayout = layout,
.newLayout = layout,
.image = rt->image[image_index],
/* clang-format off */
.subresourceRange = {
.aspectMask = aspect_mask,
.baseArrayLayer = 0,
.baseMipLevel = 0,
.layerCount = 1,
.levelCount = 1,
},
/* clang-format on */
};
VkDependencyInfo dep_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = &image_barrier,
.imageMemoryBarrierCount = 1,
};
vkCmdPipelineBarrier2(cmdbuf, &dep_info);
#ifdef RT_DEBUG
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
#endif
}
static void DoLayoutTransition(rt_render_target *rt,
uint32_t image_index,
rt_render_target_state new_state,
VkCommandBuffer cmdbuf) {
#if !USE_SIMPLE_SYNC_LIB
/* Determine old and new layout */
VkImageLayout old_layout;
switch (rt->states[image_index]) {
case RT_RENDER_TARGET_STATE_ATTACHMENT:
old_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
break;
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
break;
default:
old_layout = VK_IMAGE_LAYOUT_UNDEFINED;
break;
}
VkImageLayout new_layout;
switch (new_state) {
case RT_RENDER_TARGET_STATE_ATTACHMENT:
new_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
break;
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
break;
default:
new_layout = VK_IMAGE_LAYOUT_UNDEFINED;
}
#ifdef RT_DEBUG
VkDebugUtilsLabelEXT debug_label = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.pLabelName = "Transition Render Target",
.color = {.13f, .54f, .13f, .75f},
};
vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
#endif
VkImageAspectFlags aspect_mask =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
: VK_IMAGE_ASPECT_COLOR_BIT;
VkPipelineStageFlags2 src_stage = 0;
VkPipelineStageFlags2 dst_stage = 0;
/* Determine access flags */
VkAccessFlags2 src_access = 0;
VkAccessFlags2 dst_access = 0;
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
src_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
src_stage =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
src_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
src_stage =
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT; // VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
// VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
}
if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
dst_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT;
dst_stage = (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
dst_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
dst_stage = VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
;
}
VkImageMemoryBarrier2 image_barrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.srcStageMask = src_stage,
.srcAccessMask = src_access,
.dstStageMask = dst_stage,
.dstAccessMask = dst_access,
.oldLayout = old_layout,
.newLayout = new_layout,
.image = rt->image[image_index],
/* clang-format off */
.subresourceRange = {
.aspectMask = aspect_mask,
.baseArrayLayer = 0,
.baseMipLevel = 0,
.layerCount = 1,
.levelCount = 1,
},
/* clang-format on */
};
VkDependencyInfo dep_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = &image_barrier,
.imageMemoryBarrierCount = 1,
};
vkCmdPipelineBarrier2(cmdbuf, &dep_info);
#ifdef RT_DEBUG
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
#endif
#else
ThsvsAccessType prev_access;
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
prev_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE;
else
prev_access = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE;
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) {
prev_access = THSVS_ACCESS_NONE;
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) {
prev_access = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER;
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) {
prev_access = THSVS_ACCESS_ANY_SHADER_READ_OTHER;
}
ThsvsAccessType next_accesses[2];
uint32_t next_access_count = 0;
if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) {
next_accesses[0] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ;
next_accesses[1] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE;
} else {
next_accesses[0] = THSVS_ACCESS_COLOR_ATTACHMENT_READ;
next_accesses[1] = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE;
}
next_access_count = 2;
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) {
next_accesses[0] = THSVS_ACCESS_NONE;
next_access_count = 1;
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) {
next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER;
next_access_count = 1;
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) {
next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_OTHER;
next_accesses[1] = THSVS_ACCESS_ANY_SHADER_WRITE;
next_access_count = 2;
}
VkImageAspectFlags aspect_mask =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
: VK_IMAGE_ASPECT_COLOR_BIT;
ThsvsImageBarrier barrier = {0};
barrier.image = rt->image[image_index];
barrier.pPrevAccesses = &prev_access;
barrier.prevAccessCount = 1;
barrier.prevLayout = THSVS_IMAGE_LAYOUT_OPTIMAL;
barrier.nextAccessCount = next_access_count;
barrier.pNextAccesses = next_accesses;
barrier.nextLayout = THSVS_IMAGE_LAYOUT_OPTIMAL;
barrier.discardContents = false;
barrier.subresourceRange.aspectMask = aspect_mask;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.layerCount = 1;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = 1;
thsvsCmdPipelineBarrier(cmdbuf, NULL, 0, NULL, 1, &barrier);
#endif
rt->states[image_index] = new_state;
}
void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdbuf_handle,
rt_render_target_handle render_target,
rt_render_target_state new_state) {
GET_CMDBUF(cmdbuf, cmdbuf_handle)
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
if (render_target.index == rtGetSwapchainRenderTarget().index) {
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
}
rt_render_target *rt = rtGetRenderTarget(render_target);
if (!rt) {
rtLog("vk", "Tried to transition invalid render target");
return;
}
if (rt->states[image_index] != new_state)
DoLayoutTransition(rt, image_index, new_state, cmdbuf);
else
ExecuteRenderTargetBarrier(rt, image_index, cmdbuf);
}
void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle,
rt_render_target_handle render_target) {
GET_CMDBUF(cmdbuf, cmdbuf_handle)
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
if (render_target.index == rtGetSwapchainRenderTarget().index) {
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
}
rt_render_target *rt = rtGetRenderTarget(render_target);
if (!rt) {
rtLog("vk", "Tried to flush invalid render target");
return;
}
VkAccessFlags2 src_access;
VkPipelineStageFlags2 src_stage;
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
src_access =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
src_stage =
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE){ /* SAMPLED_IMAGE or STORAGE_IMAGE */
src_access = VK_ACCESS_2_MEMORY_WRITE_BIT;
src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
}else {
return;
}
VkMemoryBarrier2 barrier = {.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
.srcAccessMask = src_access,
.srcStageMask = src_stage,
.dstAccessMask = 0,
.dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT};
VkDependencyInfo dep = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.memoryBarrierCount = 1,
.pMemoryBarriers = &barrier,
};
vkCmdPipelineBarrier2(cmdbuf, &dep);
}

View File

@ -1,139 +0,0 @@
#include "command_buffers.h"
#include "gpu.h"
#include "render_targets.h"
#include "swapchain.h"
#include "transfers.h"
#include "gfx/renderer_api.h"
#define ONE_SECOND_NS 1000000000u
void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
g_gpu.current_frame_id = frame_id;
rt_frame_data *frame = rtGetFrameData(frame_id);
/* Wait until the previous frame is done */
VkFence fence = g_swapchain.image_fences[frame_id % g_swapchain.image_count];
RT_VK_CHECK(vkWaitForFences(g_gpu.device, 1, &fence, VK_TRUE, ONE_SECOND_NS));
RT_VK_CHECK(vkResetFences(g_gpu.device, 1, &fence));
rtResetCommandPools(frame_id);
VkResult acquire_res = vkAcquireNextImageKHR(g_gpu.device,
g_swapchain.swapchain,
ONE_SECOND_NS,
frame->image_available,
fence,
&frame->swapchain_image_index);
if (acquire_res == VK_SUBOPTIMAL_KHR || acquire_res == VK_ERROR_OUT_OF_DATE_KHR) {
/* We need to recreate the swapchain and try again */
rtLog("vk", "Swapchain has become suboptimal and needs to be re-created.");
vkDeviceWaitIdle(g_gpu.device);
if (rtRecreateSwapchain() != RT_SUCCESS) {
rtReportError("vk", "Failed to recreate the swapchain.");
return;
}
rtUpdateSwapchainRenderTarget();
rtUpdateRenderTargetsFromSwapchain(g_swapchain.image_count,
g_swapchain.format,
g_swapchain.extent);
rtRenBeginFrame(frame_id);
} else if (acquire_res != VK_SUCCESS) {
rtReportError("vk", "vkAcquireNextImageKHR failed: %u", acquire_res);
}
/* Update the swapchain render target */
rt_render_target_handle swap_rt_handle = rtGetSwapchainRenderTarget();
rt_render_target *swap_rt = rtGetRenderTarget(swap_rt_handle);
swap_rt->states[frame->swapchain_image_index] = RT_RENDER_TARGET_STATE_INVALID;
}
void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
rt_frame_data *frame = rtGetFrameData(frame_id);
uint32_t image_index = frame->swapchain_image_index;
/* Transition the swap chain image to the correct layout */
VkCommandBuffer cmd = rtAllocSingleCommandBuffer(RT_GRAPHICS_QUEUE);
if (cmd == VK_NULL_HANDLE) {
rtReportError("vk",
"Failed to allocate a command buffer for transitioning the swapchain image "
"to PRESENT_SRC layout.");
return;
}
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
vkBeginCommandBuffer(cmd, &begin_info);
#ifdef RT_DEBUG
VkDebugUtilsLabelEXT debug_label = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.color = {.13f, .54f, .13f, 1.f},
.pLabelName = "Transition Swapchain"
};
vkCmdBeginDebugUtilsLabelEXT(cmd, &debug_label);
#endif
VkImageMemoryBarrier2 image_barrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT | VK_ACCESS_2_MEMORY_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.image = g_swapchain.images[image_index],
/* clang-format off */
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseArrayLayer = 0,
.baseMipLevel = 0,
.layerCount = 1,
.levelCount = 1,
},
/* clang-format on */
};
VkDependencyInfo dep_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = &image_barrier,
.imageMemoryBarrierCount = 1,
};
vkCmdPipelineBarrier2(cmd, &dep_info);
#ifdef RT_DEBUG
vkCmdEndDebugUtilsLabelEXT(cmd);
#endif
vkEndCommandBuffer(cmd);
if (rtSubmitSingleCommandBuffer(cmd,
&frame->render_finished,
NULL,
1,
&frame->swapchain_transitioned,
NULL,
1,
RT_GRAPHICS_QUEUE,
VK_NULL_HANDLE) != RT_SUCCESS) {
rtReportError("vk", "Failed to submit the layout transition for the swapchain image.");
return;
}
VkPresentInfoKHR present_info = {
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pImageIndices = &image_index,
.pSwapchains = &g_swapchain.swapchain,
.swapchainCount = 1,
.pWaitSemaphores = &frame->swapchain_transitioned,
.waitSemaphoreCount = 1,
};
VkResult res = vkQueuePresentKHR(g_gpu.present_queue, &present_info);
if (res != VK_SUCCESS) {
rtReportError("vk", "vkQueuePresentKHR failed: %u", res);
}
rtFlushGPUTransfers();
}

View File

@ -1,22 +0,0 @@
#ifndef RT_VK_FRAMEBUFFER_H
#define RT_VK_FRAMEBUFFER_H
#include <volk/volk.h>
typedef struct {
VkFramebuffer framebuffer;
uint32_t pass_idx;
} rt_framebuffer;
typedef struct {
uint32_t index;
} rt_framebuffer_handle;
/* Reserve a slot, but don't actually create the framebuffer yet.
* We can use this if we are unsure if the framebuffer will really be needed.
*/
rt_framebuffer_handle rt_reserve_framebuffer(void);
rt_framebuffer *rt_get_framebuffer(rt_framebuffer_handle handle);
#endif

View File

@ -1,107 +0,0 @@
#ifndef RT_VK_GPU_H
#define RT_VK_GPU_H
#include <volk/volk.h>
#define VMA_STATIC_VULKAN_FUNCTIONS 0
#define VMA_DYNAMI_VULKAN_FUNCTIONS 0
#include <vma/vk_mem_alloc.h>
#include "gfx/renderer_api.h"
/* Used to mark a resource as not owned by a particular queue */
#define RT_VK_UNOWNED 255
/* Minimum supported value of g_gpu.max_frames_in_flight */
#define RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT 2
/* Maximum supported number of frames in flight.
* The actually configured value is contained in g_gpu. */
#define RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT 3
#ifdef _WIN32
struct HINSTANCE__;
struct HWND__;
#elif defined(RT_USE_XLIB)
struct _XDisplay;
#endif
typedef struct {
#ifdef _WIN32
struct HINSTANCE__ *hInstance;
struct HWND__ *hWnd;
#elif defined(RT_USE_XLIB)
struct _XDisplay *display;
unsigned long window;
#endif
} rt_native_window;
typedef struct {
uint32_t swapchain_image_index;
VkSemaphore image_available;
VkSemaphore render_finished;
VkSemaphore swapchain_transitioned;
} rt_frame_data;
typedef struct {
VkInstance instance;
VkDebugUtilsMessengerEXT messenger;
VkAllocationCallbacks *alloc_cb;
VkPhysicalDevice phys_device;
VkDevice device;
VkSurfaceKHR surface;
VkQueue graphics_queue;
VkQueue compute_queue;
VkQueue present_queue;
VkQueue transfer_queue;
uint32_t graphics_family;
uint32_t compute_family;
uint32_t present_family;
uint32_t transfer_family;
rt_native_window native_window;
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props;
VkPhysicalDeviceProperties phys_device_props;
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features;
VkPhysicalDeviceFeatures phys_device_features;
VmaAllocator allocator;
unsigned int max_frames_in_flight;
unsigned int current_frame_id;
rt_frame_data frames[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
} rt_vk_gpu;
#ifndef RT_VK_DONT_DEFINE_GPU_GLOBAL
extern rt_vk_gpu g_gpu;
RT_INLINE rt_frame_data *rtGetFrameData(unsigned int frame_id) {
return &g_gpu.frames[frame_id % g_gpu.max_frames_in_flight];
}
#endif
/* Helper functions */
#define RT_VK_CHECK(expr) \
do { \
VkResult res = expr; \
if (res != VK_SUCCESS) { \
rtReportError("vk", "Vulkan command failed with error %u.\nCommand: %s", res, #expr); \
} \
} while (0)
VkFormat rtPixelFormatToVkFormat(rt_pixel_format format);
VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count);
VkQueue rtGetQueue(rt_gpu_queue queue);
uint32_t rtGetQueueFamily(rt_gpu_queue queue);
const char *rtVkFormatToString(VkFormat format);
#endif

View File

@ -1,192 +0,0 @@
#include "gpu.h"
#include "runtime/config.h"
#include "runtime/handles.h"
#include "runtime/threading.h"
#include "gfx/renderer_api.h"
#include <stdlib.h>
RT_CVAR_I(rt_VkMaxSemaphores, "Maximum number of semaphores. Default: 1024", 1024);
#define SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX 0xffffff
#define RENDER_FINISHED_SEMAPHORE_INDEX 0xfffffe
typedef struct rt_gpu_semaphore_s {
uint32_t version;
VkSemaphore semaphore[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
uint64_t current_value[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
/* TODO: State tracking. We want to ensure that we don't introduce gpu hangs by waiting on a
* not-signaled semaphore. */
struct rt_gpu_semaphore_s *next_free;
} rt_gpu_semaphore;
static rt_gpu_semaphore *_semaphores;
static rt_gpu_semaphore *_first_free;
static rt_mutex *_lock;
static void DestroySemaphore(rt_gpu_semaphore *s) {
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
vkDestroySemaphore(g_gpu.device, s->semaphore[i], g_gpu.alloc_cb);
s->semaphore[i] = VK_NULL_HANDLE;
}
rtLockMutex(_lock);
s->next_free = _first_free;
_first_free = s;
rtUnlockMutex(_lock);
}
rt_result InitializeSempahoreManagement(void) {
_semaphores = calloc(rt_VkMaxSemaphores.i, sizeof(rt_gpu_semaphore));
if (!_semaphores)
return RT_OUT_OF_MEMORY;
_lock = rtCreateMutex();
if (!_lock) {
free(_semaphores);
return RT_UNKNOWN_ERROR;
}
/* Keep 0 unused for the invalid handle */
_first_free = &_semaphores[1];
for (int i = 1; i < rt_VkMaxSemaphores.i - 1; ++i)
_semaphores[i].next_free = &_semaphores[i + 1];
_semaphores[rt_VkMaxSemaphores.i - 1].next_free = NULL;
return RT_SUCCESS;
}
void ShutdownSemaphoreManagement(void) {
for (int i = 1; i < rt_VkMaxSemaphores.i; ++i) {
for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j)
vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore[j], g_gpu.alloc_cb);
}
}
rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count,
const rt_gpu_semaphore_info *info,
rt_gpu_semaphore_handle *p_semaphores) {
for (uint32_t i = 0; i < count; ++i) {
rtLockMutex(_lock);
rt_gpu_semaphore *sem = _first_free;
if (sem)
_first_free = sem->next_free;
rtUnlockMutex(_lock);
if (!sem) {
for (uint32_t j = 0; j < i; ++j) {
uint32_t index = p_semaphores[j].index;
DestroySemaphore(&_semaphores[index]);
}
return RT_OUT_OF_MEMORY;
}
sem->version = (sem->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j) {
VkSemaphoreTypeCreateInfo type_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
.initialValue = info[i].initial_value,
};
VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &type_info,
};
if (vkCreateSemaphore(g_gpu.device,
&semaphore_info,
g_gpu.alloc_cb,
&sem->semaphore[j]) != VK_SUCCESS) {
for (uint32_t k = 0; k < i; ++k) {
uint32_t index = p_semaphores[k].index;
DestroySemaphore(&_semaphores[index]);
}
return RT_UNKNOWN_ERROR;
}
#ifdef RT_DEBUG
char name[128];
rtSPrint(name, 128, "%s (%u)", (info->name) ? info->name : "Unnamed Semaphore", j);
VkDebugUtilsObjectNameInfoEXT name_info = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.objectHandle = (uint64_t)sem->semaphore[j],
.objectType = VK_OBJECT_TYPE_SEMAPHORE,
.pObjectName = name,
};
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
#endif
sem->current_value[j] = 0;
}
p_semaphores[i].version = (unsigned char)sem->version;
p_semaphores[i].index = (uint32_t)(sem - _semaphores);
}
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) {
for (uint32_t i = 0; i < count; ++i) {
uint32_t index = semaphores[i].index;
if (index >= (uint32_t)rt_VkMaxSemaphores.i)
continue;
if (semaphores[i].version != _semaphores[index].version) {
rtLog("vk",
"Tried to destroy semaphore %u with version %u, but the semaphore has version %u",
index,
semaphores[i].version,
_semaphores[index].version);
continue;
}
DestroySemaphore(&_semaphores[index]);
}
}
VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle) {
uint32_t index = handle.index;
if (index == SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX) {
rt_frame_data *fd = rtGetFrameData(g_gpu.current_frame_id);
return fd->image_available;
} else if (index == RENDER_FINISHED_SEMAPHORE_INDEX) {
rt_frame_data *fd = rtGetFrameData(g_gpu.current_frame_id);
return fd->render_finished;
}
if (!RT_IS_HANDLE_VALID(handle) || index >= (uint32_t)rt_VkMaxSemaphores.i)
return VK_NULL_HANDLE;
if (_semaphores[index].version != handle.version)
return VK_NULL_HANDLE;
uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
return _semaphores[index].semaphore[frame];
}
uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore) {
uint32_t index = semaphore.index;
if (!RT_IS_HANDLE_VALID(semaphore) || index >= (uint32_t)rt_VkMaxSemaphores.i)
return 0;
if (_semaphores[index].version != semaphore.version)
return 0;
uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
vkGetSemaphoreCounterValue(g_gpu.device,
_semaphores[index].semaphore[frame],
&_semaphores[index].current_value[frame]);
return _semaphores[index].current_value[frame];
}
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) {
return (rt_gpu_semaphore_handle){
.version = 1,
.index = SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX,
};
}
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) {
return (rt_gpu_semaphore_handle){
.version = 1,
.index = RENDER_FINISHED_SEMAPHORE_INDEX,
};
}

View File

@ -1,10 +0,0 @@
#ifndef RT_VK_GPU_SYNC_H
#define RT_VK_GPU_SYNC_H
#include <volk/volk.h>
#include "gfx/renderer_api.h"
VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle);
#endif

View File

@ -1,97 +0,0 @@
#include "gpu.h"
VkFormat rtPixelFormatToVkFormat(rt_pixel_format format) {
switch (format) {
case RT_PIXEL_FORMAT_R8G8B8A8_UNORM:
return VK_FORMAT_R8G8B8A8_UNORM;
case RT_PIXEL_FORMAT_B8G8R8A8_UNORM:
return VK_FORMAT_B8G8R8A8_UNORM;
case RT_PIXEL_FORMAT_R8G8B8A8_SRGB:
return VK_FORMAT_R8G8B8A8_SRGB;
case RT_PIXEL_FORMAT_B8G8R8A8_SRGB:
return VK_FORMAT_B8G8R8A8_SRGB;
case RT_PIXEL_FORMAT_R8G8B8_UNORM:
return VK_FORMAT_R8G8B8_UNORM;
case RT_PIXEL_FORMAT_B8G8R8_UNORM:
return VK_FORMAT_B8G8R8_UNORM;
case RT_PIXEL_FORMAT_R8G8B8_SRGB:
return VK_FORMAT_R8G8B8_SRGB;
case RT_PIXEL_FORMAT_B8G8R8_SRGB:
return VK_FORMAT_B8G8R8_SRGB;
case RT_PIXEL_FORMAT_DEPTH24_STENCIL8:
return VK_FORMAT_D24_UNORM_S8_UINT;
case RT_PIXEL_FORMAT_DEPTH32:
return VK_FORMAT_D32_SFLOAT;
default:
return VK_FORMAT_UNDEFINED;
}
}
VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count) {
/* Limit to what the gpu supports */
VkSampleCountFlags counts = g_gpu.phys_device_props.limits.framebufferColorSampleCounts &
g_gpu.phys_device_props.limits.framebufferDepthSampleCounts &
g_gpu.phys_device_props.limits.sampledImageColorSampleCounts &
g_gpu.phys_device_props.limits.sampledImageDepthSampleCounts;
while (count > 1) {
if ((counts & count) == 0)
count >>= 1;
else
break;
}
return (VkSampleCountFlagBits)count;
}
VkQueue rtGetQueue(rt_gpu_queue queue) {
switch (queue) {
case RT_GRAPHICS_QUEUE:
return g_gpu.graphics_queue;
case RT_COMPUTE_QUEUE:
return g_gpu.compute_queue;
case RT_TRANSFER_QUEUE:
return g_gpu.transfer_queue;
default:
return VK_NULL_HANDLE;
}
}
uint32_t rtGetQueueFamily(rt_gpu_queue queue) {
switch (queue) {
case RT_GRAPHICS_QUEUE:
return g_gpu.graphics_family;
case RT_COMPUTE_QUEUE:
return g_gpu.compute_family;
case RT_TRANSFER_QUEUE:
return g_gpu.transfer_family;
default:
return UINT32_MAX;
}
}
const char *rtVkFormatToString(VkFormat format) {
switch (format) {
case VK_FORMAT_R8G8B8A8_UNORM:
return "R8G8B8A8_UNORM";
case VK_FORMAT_B8G8R8A8_UNORM:
return "B8G8R8A8_UNORM";
case VK_FORMAT_R8G8B8A8_SRGB:
return "R8G8B8A8_SRGB";
case VK_FORMAT_B8G8R8A8_SRGB:
return "B8G8R8A8_SRGB";
case VK_FORMAT_R8G8B8_UNORM:
return "R8G8B8_UNORM";
case VK_FORMAT_B8G8R8_UNORM:
return "B8G8R8_UNORM";
case VK_FORMAT_R8G8B8_SRGB:
return "R8G8B8_SRGB";
case VK_FORMAT_B8G8R8_SRGB:
return "B8G8R8_SRGB";
case VK_FORMAT_D24_UNORM_S8_UINT:
return "D24_UNORM_S8_UINT";
case VK_FORMAT_D32_SFLOAT:
return "D32_SFLOAT";
default:
return "UNDEFINED";
}
}

View File

@ -1,737 +0,0 @@
#include <malloc.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#define RT_VK_DONT_DEFINE_GPU_GLOBAL
#include "gpu.h"
#include "render_targets.h"
#include "swapchain.h"
#include "runtime/config.h"
#include "runtime/runtime.h"
#include "gfx/renderer_api.h"
#define TARGET_API_VERSION VK_API_VERSION_1_3
RT_CVAR_I(r_VkEnableAPIAllocTracking,
"Enable tracking of allocations done by the vulkan api. [0/1] Default: 0",
0);
RT_CVAR_S(r_VkPhysDeviceName, "Name of the selected physical device. Default: \"\"", "");
RT_CVAR_I(r_VkMaxFramesInFlight, "Maximum number of frames in flight. [2/3] Default: 2", 2);
rt_vk_gpu g_gpu;
static VkAllocationCallbacks _tracking_alloc_cbs;
static const char *AllocationScopeToString(VkSystemAllocationScope scope) {
switch (scope) {
case VK_SYSTEM_ALLOCATION_SCOPE_COMMAND:
return "COMMAND";
case VK_SYSTEM_ALLOCATION_SCOPE_OBJECT:
return "OBJECT";
case VK_SYSTEM_ALLOCATION_SCOPE_CACHE:
return "CACHE";
case VK_SYSTEM_ALLOCATION_SCOPE_DEVICE:
return "DEVICE";
case VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE:
return "INSTANCE";
default:
return "UNKNOWN";
}
}
static void *
TrackAllocation(void *userData, size_t size, size_t alignment, VkSystemAllocationScope scope) {
rtLog("vk",
"Allocation. Size: %zu, Alignment: %zu, Scope: %s",
size,
alignment,
AllocationScopeToString(scope));
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#else
return aligned_alloc(alignment, size);
#endif
}
static void *TrackReallocation(void *userData,
void *original,
size_t size,
size_t alignment,
VkSystemAllocationScope scope) {
rtLog("vk",
"Reallocation. Size: %zu, Alignment: %zu, Scope: %s",
size,
alignment,
AllocationScopeToString(scope));
return realloc(original, size);
}
static void TrackFree(void *userData, void *memory) {
free(memory);
}
static VkBool32 VKAPI_PTR
DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
VkDebugUtilsMessageTypeFlagsEXT types,
const VkDebugUtilsMessengerCallbackDataEXT *callbackData,
void *userData) {
if (severity < VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)
return VK_FALSE;
const char *severity_str = "<UNKNOWN>";
if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)
severity_str = "WARNING";
else if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
severity_str = "ERROR";
rtLog("vk", "[%s] %s", severity_str, callbackData->pMessage);
if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
RT_DEBUGBREAK;
return VK_FALSE;
}
extern rt_cvar r_VkPreferredSwapchainImages;
extern rt_cvar r_VkPreferMailboxMode;
extern rt_cvar r_VkMaxPipelineCount;
void RT_RENDERER_API_FN(RegisterCVars)(void) {
rtRegisterCVAR(&r_VkEnableAPIAllocTracking);
rtRegisterCVAR(&r_VkPhysDeviceName);
rtRegisterCVAR(&r_VkPreferredSwapchainImages);
rtRegisterCVAR(&r_VkPreferMailboxMode);
rtRegisterCVAR(&r_VkMaxFramesInFlight);
rtRegisterCVAR(&r_VkMaxPipelineCount);
}
static rt_result CreateInstance(void) {
VkResult result = volkInitialize();
if (result != VK_SUCCESS) {
rtReportError("vk", "Initialization failed: volkInitialize()");
return 1;
}
VkApplicationInfo app_info = {
.apiVersion = TARGET_API_VERSION,
.applicationVersion = 0x00001000,
.engineVersion = 0x00001000,
.pEngineName = "voyageEngine",
.pApplicationName = "Voyage",
};
const char *extensions[] = {
VK_KHR_SURFACE_EXTENSION_NAME,
#ifdef _WIN32
"VK_KHR_win32_surface",
#elif defined(RT_USE_XLIB)
"VK_KHR_xlib_surface",
#endif
#ifdef RT_DEBUG
VK_EXT_DEBUG_UTILS_EXTENSION_NAME,
#endif
};
const char *layers[1];
unsigned int layer_count = 0;
#ifdef RT_DEBUG
/* Search for layers we want to enable */
uint32_t available_layer_count = 0;
result = vkEnumerateInstanceLayerProperties(&available_layer_count, NULL);
if (result == VK_SUCCESS) {
VkLayerProperties *props = calloc(available_layer_count, sizeof(VkLayerProperties));
if (props) {
vkEnumerateInstanceLayerProperties(&available_layer_count, props);
for (uint32_t i = 0; i < available_layer_count; ++i) {
if (strcmp(props[i].layerName, "VK_LAYER_KHRONOS_validation") == 0) {
layers[0] = "VK_LAYER_KHRONOS_validation";
layer_count = 1;
break;
}
}
free(props);
} else {
rtLog("vk", "Failed to allocate storage for instance layer properties.");
}
} else {
rtLog("vk", "vkEnumerateInstanceLayerProperties failed.");
}
#endif
VkInstanceCreateInfo instance_info = {
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
.pApplicationInfo = &app_info,
.ppEnabledExtensionNames = extensions,
.enabledExtensionCount = RT_ARRAY_COUNT(extensions),
.ppEnabledLayerNames = layers,
.enabledLayerCount = layer_count,
};
result = vkCreateInstance(&instance_info, g_gpu.alloc_cb, &g_gpu.instance);
if (result != VK_SUCCESS) {
rtReportError("vk", "Failed to create the vulkan instance.");
return 1;
}
volkLoadInstance(g_gpu.instance);
#ifdef RT_DEBUG
/* Create the debug utils messenger */
VkDebugUtilsMessengerCreateInfoEXT messenger_info = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
.pfnUserCallback = DebugUtilsMessengerCb,
};
vkCreateDebugUtilsMessengerEXT(g_gpu.instance,
&messenger_info,
g_gpu.alloc_cb,
&g_gpu.messenger);
#endif
return RT_SUCCESS;
}
static rt_result CreateSurface(const rt_renderer_init_info *info) {
#ifdef _WIN32
g_gpu.native_window.hInstance = info->hInstance;
g_gpu.native_window.hWnd = info->hWnd;
VkWin32SurfaceCreateInfoKHR surface_info = {
.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
.hinstance = info->hInstance,
.hwnd = info->hWnd,
};
if (vkCreateWin32SurfaceKHR(g_gpu.instance, &surface_info, g_gpu.alloc_cb, &g_gpu.surface) ==
VK_SUCCESS)
return RT_SUCCESS;
else
return 100;
#elif defined(RT_USE_XLIB)
g_gpu.native_window.display = info->display;
g_gpu.native_window.window = info->window;
VkXlibSurfaceCreateInfoKHR surface_info = {
.sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
.dpy = info->display,
.window = info->window,
};
if (vkCreateXlibSurfaceKHR(g_gpu.instance, &surface_info, g_gpu.alloc_cb, &g_gpu.surface) ==
VK_SUCCESS)
return RT_SUCCESS;
else
return 100;
#endif
}
typedef struct {
uint32_t graphics;
uint32_t compute;
uint32_t present;
uint32_t transfer;
} rt_queue_indices;
static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfaceKHR surface) {
rt_queue_indices indices = {.graphics = UINT32_MAX,
.compute = UINT32_MAX,
.present = UINT32_MAX,
.transfer = UINT32_MAX};
uint32_t count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, NULL);
VkQueueFamilyProperties *props = calloc(count, sizeof(VkQueueFamilyProperties));
if (!props) {
return indices;
}
vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, props);
for (uint32_t i = 0; i < count; ++i) {
if (props[i].queueCount == 0)
continue;
if ((props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0)
indices.graphics = i;
if ((props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
indices.compute = i;
if ((props[i].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
indices.transfer = i;
VkBool32 present_supported = VK_FALSE;
vkGetPhysicalDeviceSurfaceSupportKHR(phys_dev, i, surface, &present_supported);
if (present_supported)
indices.present = i;
}
if (indices.transfer == UINT32_MAX && indices.graphics != UINT32_MAX)
indices.transfer = indices.graphics;
else if (indices.transfer == UINT32_MAX && indices.compute != UINT32_MAX)
indices.transfer = indices.compute;
free(props);
return indices;
}
static bool CheckDeviceExtensionSupported(VkPhysicalDevice phys_dev) {
const char *required_extensions[] = {
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
};
uint32_t extension_count;
vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, NULL);
VkExtensionProperties *supported_extensions =
calloc(extension_count, sizeof(VkExtensionProperties));
if (!supported_extensions)
return false;
vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, supported_extensions);
bool supported = true;
for (uint32_t i = 0; i < RT_ARRAY_COUNT(required_extensions); ++i) {
bool found = false;
for (uint32_t j = 0; j < extension_count; ++j) {
if (strncmp(supported_extensions[j].extensionName,
required_extensions[i],
VK_MAX_EXTENSION_NAME_SIZE) == 0) {
found = true;
break;
}
}
if (!found) {
supported = false;
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(phys_dev, &props);
rtLog("Device %s does not support the required extension %s",
props.deviceName,
required_extensions[i]);
goto out;
}
}
out:
free(supported_extensions);
return supported;
}
static rt_result ChoosePhysicalDevice(void) {
g_gpu.phys_device = VK_NULL_HANDLE;
uint32_t phys_device_count = 0;
VkResult result = vkEnumeratePhysicalDevices(g_gpu.instance, &phys_device_count, NULL);
if (result != VK_SUCCESS) {
rtReportError("vk", "Failed to enumerate the physical devices.");
return 2;
}
VkPhysicalDevice *phys_devices = calloc(phys_device_count, sizeof(VkPhysicalDevice));
if (!phys_devices) {
rtReportError("vk", "Failed to enumerate the physical devices: Out of memory.");
return 2;
}
vkEnumeratePhysicalDevices(g_gpu.instance, &phys_device_count, phys_devices);
uint32_t highscore = 0;
uint32_t best_index = phys_device_count;
for (uint32_t i = 0; i < phys_device_count; ++i) {
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
VkPhysicalDeviceSynchronization2Features synchronization2_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
.pNext = &timeline_semaphore_features,
};
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
.pNext = &synchronization2_features,
};
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
.pNext = &dynamic_rendering_features,
};
VkPhysicalDeviceFeatures2 features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = &descriptor_indexing_features,
};
vkGetPhysicalDeviceFeatures2(phys_devices[i], &features);
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES,
.pNext = NULL,
};
VkPhysicalDeviceProperties2 props = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = &descriptor_indexing_props,
};
vkGetPhysicalDeviceProperties2(phys_devices[i], &props);
if (!CheckDeviceExtensionSupported(phys_devices[i]))
continue;
rt_queue_indices indices = RetrieveQueueIndices(phys_devices[i], g_gpu.surface);
if (indices.compute == UINT32_MAX || indices.present == UINT32_MAX ||
indices.graphics == UINT32_MAX)
continue;
if (!synchronization2_features.synchronization2 ||
!dynamic_rendering_features.dynamicRendering ||
!timeline_semaphore_features.timelineSemaphore)
continue;
/* Check for bindless support */
if (!descriptor_indexing_features.runtimeDescriptorArray ||
!descriptor_indexing_features.descriptorBindingPartiallyBound)
continue;
uint32_t score = 0;
if (props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)
score += 100;
score += (props.properties.limits.maxFramebufferWidth / 100) *
(props.properties.limits.maxFramebufferHeight / 100);
score +=
(descriptor_indexing_props.shaderStorageBufferArrayNonUniformIndexingNative) ? 100 : 0;
score +=
(descriptor_indexing_props.shaderSampledImageArrayNonUniformIndexingNative) ? 100 : 0;
if (score > highscore) {
highscore = score;
best_index = i;
}
if (strncmp(props.properties.deviceName,
r_VkPhysDeviceName.s,
VK_MAX_PHYSICAL_DEVICE_NAME_SIZE) == 0) {
best_index = i;
break;
}
}
if (best_index < phys_device_count) {
g_gpu.phys_device = phys_devices[best_index];
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES,
.pNext = NULL,
};
VkPhysicalDeviceProperties2 props = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = &descriptor_indexing_props,
};
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
};
VkPhysicalDeviceFeatures2 features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = &descriptor_indexing_features,
};
vkGetPhysicalDeviceFeatures2(phys_devices[best_index], &features);
vkGetPhysicalDeviceProperties2(phys_devices[best_index], &props);
g_gpu.phys_device_props = props.properties;
g_gpu.descriptor_indexing_props = descriptor_indexing_props;
g_gpu.phys_device_features = features.features;
g_gpu.descriptor_indexing_features = descriptor_indexing_features;
}
free(phys_devices);
if (g_gpu.phys_device == VK_NULL_HANDLE) {
rtReportError("vk", "Failed to find a suitable physical device.");
return 3;
}
return RT_SUCCESS;
}
static rt_result CreateDevice(void) {
const char *extensions[] = {
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
};
rt_queue_indices queue_indices = RetrieveQueueIndices(g_gpu.phys_device, g_gpu.surface);
g_gpu.compute_family = queue_indices.compute;
g_gpu.graphics_family = queue_indices.graphics;
g_gpu.present_family = queue_indices.present;
g_gpu.transfer_family = queue_indices.transfer;
float priority = 1.f;
uint32_t distinct_queue_count = 1;
VkDeviceQueueCreateInfo queue_info[4];
queue_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue_info[0].pNext = NULL;
queue_info[0].flags = 0;
queue_info[0].queueCount = 1;
queue_info[0].queueFamilyIndex = queue_indices.graphics;
queue_info[0].pQueuePriorities = &priority;
if (queue_indices.compute != queue_indices.graphics) {
queue_info[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue_info[1].pNext = NULL;
queue_info[1].flags = 0;
queue_info[1].queueCount = 1;
queue_info[1].queueFamilyIndex = queue_indices.compute;
queue_info[1].pQueuePriorities = &priority;
++distinct_queue_count;
}
if (queue_indices.present != queue_indices.graphics &&
queue_indices.present != queue_indices.compute) {
queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue_info[distinct_queue_count].pNext = NULL;
queue_info[distinct_queue_count].flags = 0;
queue_info[distinct_queue_count].queueCount = 1;
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.present;
queue_info[distinct_queue_count].pQueuePriorities = &priority;
++distinct_queue_count;
}
if (queue_indices.transfer != queue_indices.graphics &&
queue_indices.transfer != queue_indices.compute) {
queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queue_info[distinct_queue_count].pNext = NULL;
queue_info[distinct_queue_count].flags = 0;
queue_info[distinct_queue_count].queueCount = 1;
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.transfer;
queue_info[distinct_queue_count].pQueuePriorities = &priority;
++distinct_queue_count;
}
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
VkPhysicalDeviceSynchronization2Features synchronization2_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
.pNext = &timeline_semaphore_features,
};
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
.pNext = &synchronization2_features,
};
VkPhysicalDeviceDescriptorIndexingFeatures indexing_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
.pNext = &dynamic_rendering_features,
};
VkPhysicalDeviceFeatures2 features = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = &indexing_features};
vkGetPhysicalDeviceFeatures2(g_gpu.phys_device, &features);
RT_ASSERT(indexing_features.runtimeDescriptorArray &&
indexing_features.descriptorBindingPartiallyBound,
"We require a device that supports bindless vulkan.");
VkDeviceCreateInfo device_info = {
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
.pNext = &features,
.enabledExtensionCount = RT_ARRAY_COUNT(extensions),
.ppEnabledExtensionNames = extensions,
.pQueueCreateInfos = queue_info,
.queueCreateInfoCount = distinct_queue_count,
};
if (vkCreateDevice(g_gpu.phys_device, &device_info, g_gpu.alloc_cb, &g_gpu.device) !=
VK_SUCCESS) {
rtReportError("vk", "Device creation failed.");
return 10;
}
vkGetDeviceQueue(g_gpu.device, queue_indices.graphics, 0, &g_gpu.graphics_queue);
vkGetDeviceQueue(g_gpu.device, queue_indices.compute, 0, &g_gpu.compute_queue);
vkGetDeviceQueue(g_gpu.device, queue_indices.present, 0, &g_gpu.present_queue);
vkGetDeviceQueue(g_gpu.device, queue_indices.transfer, 0, &g_gpu.transfer_queue);
return RT_SUCCESS;
}
static rt_result CreateAllocator(void) {
#define SET_FNC(name) fncs.name = name
#define SET_KHR_FNC(name) (fncs).name##KHR = name
VmaVulkanFunctions fncs = {NULL};
SET_FNC(vkGetInstanceProcAddr);
SET_FNC(vkGetDeviceProcAddr);
SET_FNC(vkGetPhysicalDeviceProperties);
SET_FNC(vkGetPhysicalDeviceMemoryProperties);
SET_FNC(vkAllocateMemory);
SET_FNC(vkFreeMemory);
SET_FNC(vkMapMemory);
SET_FNC(vkUnmapMemory);
SET_FNC(vkFlushMappedMemoryRanges);
SET_FNC(vkInvalidateMappedMemoryRanges);
SET_FNC(vkBindBufferMemory);
SET_FNC(vkBindImageMemory);
SET_FNC(vkGetBufferMemoryRequirements);
SET_FNC(vkGetImageMemoryRequirements);
SET_FNC(vkCreateBuffer);
SET_FNC(vkDestroyBuffer);
SET_FNC(vkCreateImage);
SET_FNC(vkDestroyImage);
SET_FNC(vkCmdCopyBuffer);
SET_KHR_FNC(vkGetBufferMemoryRequirements2);
SET_KHR_FNC(vkGetImageMemoryRequirements2);
SET_KHR_FNC(vkBindBufferMemory2);
SET_KHR_FNC(vkBindImageMemory2);
SET_KHR_FNC(vkGetPhysicalDeviceMemoryProperties2);
SET_FNC(vkGetDeviceBufferMemoryRequirements);
SET_FNC(vkGetDeviceImageMemoryRequirements);
#undef SET_FNC
#undef SET_KHR_FNC
VmaAllocatorCreateInfo allocator_info = {
.instance = g_gpu.instance,
.physicalDevice = g_gpu.phys_device,
.device = g_gpu.device,
.pAllocationCallbacks = g_gpu.alloc_cb,
.vulkanApiVersion = TARGET_API_VERSION,
.pVulkanFunctions = &fncs,
};
return vmaCreateAllocator(&allocator_info, &g_gpu.allocator) == VK_SUCCESS ? RT_SUCCESS
: RT_UNKNOWN_ERROR;
}
static void DestroyAllocator(void) {
vmaDestroyAllocator(g_gpu.allocator);
}
static rt_result CreatePerFrameObjects(void) {
for (unsigned int i = 0; i < g_gpu.max_frames_in_flight; ++i) {
VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
};
if (vkCreateSemaphore(g_gpu.device,
&semaphore_info,
g_gpu.alloc_cb,
&g_gpu.frames[i].render_finished) != VK_SUCCESS) {
return RT_UNKNOWN_ERROR;
}
if (vkCreateSemaphore(g_gpu.device,
&semaphore_info,
g_gpu.alloc_cb,
&g_gpu.frames[i].image_available) != VK_SUCCESS) {
return RT_UNKNOWN_ERROR;
}
if (vkCreateSemaphore(g_gpu.device,
&semaphore_info,
g_gpu.alloc_cb,
&g_gpu.frames[i].swapchain_transitioned) != VK_SUCCESS) {
return RT_UNKNOWN_ERROR;
}
#ifdef RT_DEBUG
char name[128];
rtSPrint(name, 128, "Render Finished Semaphore (%u)", i);
VkDebugUtilsObjectNameInfoEXT name_info = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.objectHandle = (uint64_t)g_gpu.frames[i].render_finished,
.objectType = VK_OBJECT_TYPE_SEMAPHORE,
.pObjectName = name,
};
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
rtSPrint(name, 128, "Image Available Semaphore (%u)", i);
name_info.objectHandle = (uint64_t)g_gpu.frames[i].image_available;
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
rtSPrint(name, 128, "Swapchain Transitioned Semaphore (%u)", i);
name_info.objectHandle = (uint64_t)g_gpu.frames[i].swapchain_transitioned;
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
#endif
}
return RT_SUCCESS;
}
void DestroyPerFrameObjects(void) {
for (unsigned int i = 0; i < g_gpu.max_frames_in_flight; ++i) {
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].image_available, g_gpu.alloc_cb);
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].render_finished, g_gpu.alloc_cb);
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].swapchain_transitioned, g_gpu.alloc_cb);
}
}
extern rt_result InitPipelineManagement(void);
extern void ShutdownPipelineManagement(void);
extern rt_result InitRenderTargetManagement(void);
extern void ShutdownRenderTargetManagement(void);
extern rt_result InitCommandBufferManagement(void);
extern void ShutdownCommandBufferManagement(void);
extern rt_result InitializeSempahoreManagement(void);
extern void ShutdownSemaphoreManagement(void);
extern rt_result InitBufferManagement(void);
extern void ShutdownBufferManagement(void);
extern rt_result InitializeTransfers(void);
extern void ShutdownTransfers(void);
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
rtLog("vk", "Init");
_tracking_alloc_cbs.pUserData = NULL;
_tracking_alloc_cbs.pfnAllocation = TrackAllocation;
_tracking_alloc_cbs.pfnReallocation = TrackReallocation;
_tracking_alloc_cbs.pfnFree = TrackFree;
if (r_VkEnableAPIAllocTracking.i) {
g_gpu.alloc_cb = &_tracking_alloc_cbs;
} else {
g_gpu.alloc_cb = NULL;
}
g_gpu.max_frames_in_flight = RT_RESTRICT_VALUE_TO_BOUNDS(r_VkMaxFramesInFlight.i,
RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT,
RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT);
int res = CreateInstance();
if (res != RT_SUCCESS)
return res;
res = CreateSurface(info);
if (res != RT_SUCCESS)
return res;
res = ChoosePhysicalDevice();
if (res != RT_SUCCESS)
return res;
res = CreateDevice();
if (res != RT_SUCCESS)
return res;
res = CreateAllocator();
if (res != RT_SUCCESS)
return res;
res = CreatePerFrameObjects();
if (res != RT_SUCCESS)
return res;
res = InitPipelineManagement();
if (res != RT_SUCCESS)
return res;
res = InitRenderTargetManagement();
if (res != RT_SUCCESS)
return res;
res = InitializeSempahoreManagement();
if (res != RT_SUCCESS)
return res;
res = InitCommandBufferManagement();
if (res != RT_SUCCESS)
return res;
res = InitBufferManagement();
if (res != RT_SUCCESS)
return res;
res = InitializeTransfers();
if (res != RT_SUCCESS)
return res;
res = rtCreateSwapchain();
if (res != RT_SUCCESS)
return res;
rtUpdateSwapchainRenderTarget();
return RT_SUCCESS;
}
void RT_RENDERER_API_FN(Shutdown)(void) {
rtLog("vk", "Shutdown");
vkDeviceWaitIdle(g_gpu.device);
rtDestroySwapchain();
ShutdownTransfers();
ShutdownBufferManagement();
ShutdownCommandBufferManagement();
ShutdownSemaphoreManagement();
ShutdownRenderTargetManagement();
ShutdownPipelineManagement();
DestroyPerFrameObjects();
DestroyAllocator();
vkDestroyDevice(g_gpu.device, g_gpu.alloc_cb);
vkDestroySurfaceKHR(g_gpu.instance, g_gpu.surface, g_gpu.alloc_cb);
#ifdef RT_DEBUG
vkDestroyDebugUtilsMessengerEXT(g_gpu.instance, g_gpu.messenger, g_gpu.alloc_cb);
#endif
vkDestroyInstance(g_gpu.instance, g_gpu.alloc_cb);
}
unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
return g_gpu.max_frames_in_flight;
}

View File

@ -1,56 +0,0 @@
if vk_dep.found()
platform_defs = []
if get_option('use_xlib')
platform_defs = ['-DVK_USE_PLATFORM_XLIB_KHR']
elif host_machine.system() == 'windows'
platform_defs = ['-DVK_USE_PLATFORM_WIN32_KHR']
endif
vk_inc_dep = vk_dep.partial_dependency(compile_args : true, includes : true)
vk_renderer_lib = library('rtvk',
# Project Sources
'command_buffers.h',
'gpu.h',
'gpu_sync.h',
'pipelines.h',
'render_targets.h',
'swapchain.h',
'transfers.h',
'../common/common_render_graph.h',
'buffers.c',
'command_buffers.c',
'commands.c',
'frame.c',
'gpu_sync.c',
'helper.c',
'init.c',
'pipelines.c',
'render_graph.c',
'render_targets.c',
'swapchain.c',
'transfers.c',
'simple_sync_impl.cpp',
'../common/common_render_graph.c',
# Contrib Sources
'../../../contrib/volk/volk.h',
'../../../contrib/volk/volk.c',
'../../../contrib/vma/vk_mem_alloc.h',
'vma_impl.cpp',
dependencies : [m_dep, vk_inc_dep, windowing_dep],
include_directories : [engine_incdir, contrib_incdir],
link_with : [runtime_lib],
c_pch : 'pch/vk_pch.h',
c_args : platform_defs,
cpp_pch : 'pch/vk_pch.hpp',
cpp_args : platform_defs,
install : true)
engine_libs += vk_renderer_lib
engine_lib_paths += vk_renderer_lib.full_path()
endif

View File

@ -1,22 +0,0 @@
#include <volk/volk.h>
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#endif
#if defined(VY_USE_XLIB)
#include <X11/Xlib.h>
#endif
#include <stdlib.h>
#include <string.h>
/* GFX */
#include "gfx/gfx.h"
/* Commonly used runtime headers */
#include "runtime/config.h"
#include "runtime/mem_arena.h"
#include "runtime/runtime.h"
#include "runtime/threading.h"

View File

@ -1,3 +0,0 @@
extern "C" {
#include "vk_pch.h"
}

View File

@ -1,186 +0,0 @@
#include "runtime/config.h"
#include "runtime/handles.h"
#include "runtime/mem_arena.h"
#include "runtime/resources.h"
#include "runtime/threading.h"
#include "gfx/renderer_api.h"
#include "gfx/effect.h"
#include "gpu.h"
#include "pipelines.h"
#include <stdlib.h>
#include <volk/volk.h>
RT_CVAR_I(r_VkMaxPipelineCount, "Maximum number of pipeline objects. Default: 1024", 1024);
typedef struct rt_pipeline_s {
uint32_t version;
rt_pipeline pipeline;
struct rt_pipeline_s *next_free;
} rt_pipeline_slot;
static rt_pipeline_slot *_pipelines;
static rt_pipeline_slot *_first_free;
static rt_rwlock _lock;
static void DestroyPipeline(rt_pipeline_slot *slot) {
if (slot->pipeline.pipeline) {
vkDestroyPipeline(g_gpu.device, slot->pipeline.pipeline, g_gpu.alloc_cb);
}
slot->next_free = _first_free;
_first_free = slot;
}
static VkShaderModule CreateShaderModuleFromResource(rt_resource_id rid) {
if (rid == RT_INVALID_RESOURCE_ID)
return VK_NULL_HANDLE;
rt_resource *resource = NULL;
size_t size = rtGetResourceSize(rid);
if (size == 0)
return VK_NULL_HANDLE;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return VK_NULL_HANDLE;
VkShaderModule module = VK_NULL_HANDLE;
resource = rtArenaPush(temp.arena, size);
if (!resource) {
rtLog("VK", "Failed to allocate temporary memory for retrieving a shader resource");
goto out;
}
if (rtGetResource(rid, resource) != RT_SUCCESS) {
goto out;
}
if (resource->type != RT_RESOURCE_SHADER) {
rtLog("VK", "Attempted to create a shader module from a non-shader resource %llx", rid);
goto out;
}
rt_shader_info *info = resource->data;
if (!info) {
rtLog("VK", "Shader resource %llx has no attached shader_info", rid);
goto out;
}
VkShaderModuleCreateInfo module_info = {.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pCode = rtResolveRelptr(&info->bytecode),
.codeSize = info->bytecode_length};
if (vkCreateShaderModule(g_gpu.device, &module_info, g_gpu.alloc_cb, &module) != VK_SUCCESS) {
rtLog("VK", "Failed to create the shader module from resource %llx", rid);
goto out;
}
out:
rtReturnTemporaryArena(temp);
return module;
}
static bool CreateComputePipeline(VkShaderModule compute_shader,
const rt_pipeline_info *info,
rt_pipeline_slot *slot) {
return false;
}
static bool CreateGraphicsPipeline(VkShaderModule vertex_shader,
VkShaderModule fragment_shader,
const rt_pipeline_info *info,
rt_pipeline_slot *slot) {
return false;
}
rt_result InitPipelineManagement(void) {
rt_create_rwlock_result lock_res = rtCreateRWLock();
if (!lock_res.ok)
return RT_UNKNOWN_ERROR;
_lock = lock_res.lock;
_pipelines = calloc(r_VkMaxPipelineCount.i, sizeof(rt_pipeline_slot));
if (!_pipelines) {
rtDestroyRWLock(&_lock);
return RT_OUT_OF_MEMORY;
}
/* Keep [0] unused to preserve 0 as the invalid handle */
_first_free = &_pipelines[1];
for (int i = 1; i < r_VkMaxPipelineCount.i - 1; ++i) {
_pipelines[i].next_free = &_pipelines[i + 1];
}
return RT_SUCCESS;
}
void ShutdownPipelineManagement(void) {
for (int i = 1; i < r_VkMaxPipelineCount.i; ++i) {
DestroyPipeline(&_pipelines[i]);
}
free(_pipelines);
rtDestroyRWLock(&_lock);
_first_free = NULL;
}
rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
rt_pipeline_handle handle = RT_INVALID_HANDLE;
rtLockWrite(&_lock);
if (!_first_free) {
rtLog("VK", "No free pipeline slots!");
rtUnlockWrite(&_lock);
return handle;
}
rt_pipeline_slot *slot = _first_free;
_first_free = slot->next_free;
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
/* No other thread that calls compile gets the same slot.
* Another thread accessing the slot via GetPipeline would get a version mismatch.
* The same holds for DestroyPipeline
*/
rtUnlockWrite(&_lock);
VkShaderModule vertex_shader = CreateShaderModuleFromResource(info->vertex_shader);
VkShaderModule fragment_shader = CreateShaderModuleFromResource(info->fragment_shader);
VkShaderModule compute_shader = CreateShaderModuleFromResource(info->compute_shader);
RT_UNUSED(vertex_shader);
RT_UNUSED(fragment_shader);
RT_UNUSED(compute_shader);
bool create_success = false;
if (compute_shader) {
create_success = CreateComputePipeline(compute_shader, info, slot);
} else if (vertex_shader && fragment_shader) {
create_success = CreateGraphicsPipeline(vertex_shader, fragment_shader, info, slot);
} else {
rtLog("VK", "Invalid combination of shaders in pipeline info.");
}
if (create_success) {
handle.version = slot->version;
handle.index = (uint32_t)(slot - _pipelines);
}
return handle;
}
void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
if (handle.index >= (uint32_t)r_VkMaxPipelineCount.i)
return;
rtLockWrite(&_lock);
if (_pipelines[handle.index].version == handle.version)
DestroyPipeline(&_pipelines[handle.index]);
else
rtLog("VK", "Tried to destroy a pipeline using an outdated handle.");
rtUnlockWrite(&_lock);
}
const rt_pipeline *rtGetPipeline(rt_pipeline_handle handle) {
if (handle.index >= (uint32_t)r_VkMaxPipelineCount.i)
return NULL;
rtLockRead(&_lock);
rt_pipeline *res = NULL;
if (_pipelines[handle.index].version == handle.version)
res = &_pipelines[handle.index].pipeline;
else
rtLog("VK", "Tried to access a pipeline using an outdated handle.");
rtUnlockRead(&_lock);
return res;
}

View File

@ -1,15 +0,0 @@
#ifndef RT_VK_PIPELINES_H
#define RT_VK_PIPELINES_H
#include <volk/volk.h>
#include "gfx/renderer_api.h"
typedef struct {
VkPipeline pipeline;
} rt_pipeline;
/* A pipeline is immutable after creation. */
const rt_pipeline *rtGetPipeline(rt_pipeline_handle handle);
#endif

View File

@ -1,27 +0,0 @@
#include "gpu.h"
#include "gfx/renderer_api.h"
#include "runtime/mem_arena.h"
#include "../common/common_render_graph.h"
#include "render_targets.h"
static int RequireExplicitSynchronization(void) {
return 1;
}
rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = rtCreateRenderTarget,
.RequireExplicitSynchronization =
RequireExplicitSynchronization};
return rtCreateRenderGraphBuilder(&cbs);
}
void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
rtDestroyRenderGraphBuilder(builder);
}
rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
RT_NOT_IMPLEMENTED;
return RT_UNKNOWN_ERROR;
}

View File

@ -1,400 +0,0 @@
#include "runtime/config.h"
#include "runtime/threading.h"
#include "gfx/renderer_api.h"
#include "gpu.h"
#include "render_targets.h"
#include "swapchain.h"
#include <stdlib.h>
#include <volk/volk.h>
RT_CVAR_I(r_VkMaxRenderTargetCount, "Maximum number of render target objects. Default: 1024", 1024);
typedef struct rt_render_target_slot_s {
uint32_t version;
rt_render_target render_target;
struct rt_render_target_slot_s *next_free;
} rt_render_target_slot;
static rt_render_target_slot *_render_targets;
static rt_render_target_slot *_first_free;
static rt_rwlock _lock;
static rt_render_target_handle _swapchain_handle;
static void DestroyRenderTarget(rt_render_target_slot *slot) {
for (unsigned int i = 0; i < slot->render_target.image_count; ++i) {
vkDestroyImageView(g_gpu.device, slot->render_target.view[i], g_gpu.alloc_cb);
vmaDestroyImage(g_gpu.allocator,
slot->render_target.image[i],
slot->render_target.allocation[i]);
}
slot->next_free = _first_free;
_first_free = slot;
}
static bool CreateImageAndView(VkExtent2D extent,
VkFormat format,
VkSampleCountFlagBits sample_count,
VkImageUsageFlagBits usage,
VkImageAspectFlagBits aspect,
VkImage *p_image,
VmaAllocation *p_allocation,
VkImageView *p_view,
const char *rt_name,
uint32_t image_index) {
uint32_t queue_families[3];
uint32_t distinct_queue_families = 1;
queue_families[0] = g_gpu.graphics_family;
if (g_gpu.compute_family != g_gpu.graphics_family)
queue_families[distinct_queue_families++] = g_gpu.compute_family;
if (g_gpu.present_family != g_gpu.graphics_family &&
g_gpu.present_family != g_gpu.compute_family)
queue_families[distinct_queue_families++] = g_gpu.present_family;
VkFormatProperties2 props = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
};
vkGetPhysicalDeviceFormatProperties2(g_gpu.phys_device, format, &props);
if ((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) == 0) {
rtLog("vk",
"Requested render target format %s can not be sampled.",
rtVkFormatToString(format));
usage &= ~VK_IMAGE_USAGE_SAMPLED_BIT;
}
if ((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) == 0) {
rtLog("vk",
"Requested render target format %s can not be used for storage.",
rtVkFormatToString(format));
usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
}
if (((usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) != 0) &&
((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) ==
0)) {
rtReportError(
"vk",
"Tried to create a render target color attachment, but the format %s does not "
"support the color attachment usage.",
rtVkFormatToString(format));
return false;
} else if (((usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) != 0) &&
((props.formatProperties.optimalTilingFeatures &
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) == 0)) {
rtReportError("vk",
"Tried to create a render target depth/stencil attachment, but the format %s"
"does not support the depth/stencil attachment usage.",
rtVkFormatToString(format));
return false;
}
VkImageCreateInfo image_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = format,
.extent = {.width = extent.width, .height = extent.height, .depth = 1},
.mipLevels = 1,
.arrayLayers = 1,
.samples = sample_count,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = usage,
.sharingMode =
(distinct_queue_families > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE,
.pQueueFamilyIndices = (distinct_queue_families > 1) ? queue_families : NULL,
.queueFamilyIndexCount = distinct_queue_families,
};
VmaAllocationCreateInfo alloc_info = {
.usage = VMA_MEMORY_USAGE_GPU_ONLY,
};
VkImage image;
VmaAllocation allocation;
if (vmaCreateImage(g_gpu.allocator, &image_info, &alloc_info, &image, &allocation, NULL) !=
VK_SUCCESS) {
return false;
}
VkImageViewCreateInfo view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = format,
.components = {.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY},
/* clang-format off */
.subresourceRange = {
.aspectMask = aspect,
.baseArrayLayer = 0,
.baseMipLevel = 0,
.layerCount = 1,
.levelCount = 1,
},
/* clang-format on */
};
VkImageView view;
if (vkCreateImageView(g_gpu.device, &view_info, g_gpu.alloc_cb, &view) != VK_SUCCESS) {
rtLog("VK", "Failed to create render target image view");
vmaDestroyImage(g_gpu.allocator, image, allocation);
return false;
}
#ifdef RT_DEBUG
char name[260];
rtSPrint(name, 260, "%s (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index);
VkDebugUtilsObjectNameInfoEXT name_info = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.objectHandle = (uint64_t)image,
.pObjectName = name,
.objectType = VK_OBJECT_TYPE_IMAGE};
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
rtSPrint(name, 260, "%s [view] (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index);
name_info =
(VkDebugUtilsObjectNameInfoEXT){.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.objectHandle = (uint64_t)view,
.pObjectName = name,
.objectType = VK_OBJECT_TYPE_IMAGE_VIEW};
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
#endif
*p_image = image;
*p_allocation = allocation;
*p_view = view;
return true;
}
rt_result InitRenderTargetManagement(void) {
rt_create_rwlock_result lock_res = rtCreateRWLock();
if (!lock_res.ok)
return RT_UNKNOWN_ERROR;
_lock = lock_res.lock;
_render_targets = calloc(r_VkMaxRenderTargetCount.i, sizeof(rt_render_target_slot));
if (!_render_targets) {
rtDestroyRWLock(&_lock);
return RT_OUT_OF_MEMORY;
}
/* Keep [0] unused to preserve 0 as the invalid handle */
_first_free = &_render_targets[1];
for (int i = 1; i < r_VkMaxRenderTargetCount.i - 1; ++i) {
_render_targets[i].next_free = &_render_targets[i + 1];
}
/* Reserve the slot for the swap chain rt */
rt_render_target_slot *slot = _first_free;
_first_free = slot->next_free;
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
_swapchain_handle = (rt_render_target_handle){.version = slot->version,
.index = (uint32_t)(slot - _render_targets)};
return RT_SUCCESS;
}
void ShutdownRenderTargetManagement(void) {
for (int i = 1; i < r_VkMaxRenderTargetCount.i; ++i) {
DestroyRenderTarget(&_render_targets[i]);
}
free(_render_targets);
rtDestroyRWLock(&_lock);
_first_free = NULL;
}
rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info) {
rt_render_target_handle handle = {0};
rtLockWrite(&_lock);
if (!_first_free) {
rtLog("VK", "No free render target slots!");
rtUnlockWrite(&_lock);
return handle;
}
rt_render_target_slot *slot = _first_free;
_first_free = slot->next_free;
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
/* No other thread that calls compile gets the same slot.
* Another thread accessing the slot via GetPipeline would get a version mismatch.
* The same holds for DestroyPipeline
*/
rtUnlockWrite(&_lock);
const char *name = info->name;
slot->render_target.match_swapchain = 0;
slot->render_target.image_count = g_swapchain.image_count;
for (unsigned int i = 0; i < g_swapchain.image_count; ++i) {
uint32_t width = info->width, height = info->height;
if (width == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
width = g_swapchain.extent.width;
slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE;
}
if (height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
height = g_swapchain.extent.height;
slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE;
}
slot->render_target.extent = (VkExtent2D){.width = width, .height = height};
if (info->format != RT_PIXEL_FORMAT_SWAPCHAIN)
slot->render_target.format = rtPixelFormatToVkFormat(info->format);
else {
slot->render_target.format = g_swapchain.format;
slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT;
}
if (info->format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8 ||
info->format == RT_PIXEL_FORMAT_DEPTH32) {
slot->render_target.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
if (info->format == RT_PIXEL_FORMAT_DEPTH32)
slot->render_target.aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
else
slot->render_target.aspect =
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
} else {
slot->render_target.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
slot->render_target.aspect = VK_IMAGE_ASPECT_COLOR_BIT;
}
slot->render_target.sample_count = rtSampleCountToFlags(info->samples);
if (!CreateImageAndView(slot->render_target.extent,
slot->render_target.format,
slot->render_target.sample_count,
slot->render_target.usage,
slot->render_target.aspect,
&slot->render_target.image[i],
&slot->render_target.allocation[i],
&slot->render_target.view[i],
name,
i)) {
slot->render_target.image_count = i;
DestroyRenderTarget(slot);
goto out;
}
slot->render_target.states[i] = RT_RENDER_TARGET_STATE_INVALID;
}
handle.version = slot->version;
handle.index = (uint32_t)(slot - _render_targets);
out:
return handle;
}
void rtDestroyRenderTarget(rt_render_target_handle handle) {
if (handle.index >= (uint32_t)r_VkMaxRenderTargetCount.i)
return;
rtLockWrite(&_lock);
if (_render_targets[handle.index].version == handle.version)
DestroyRenderTarget(&_render_targets[handle.index]);
else
rtLog("VK", "Tried to destroy a render target using an outdated handle.");
rtUnlockWrite(&_lock);
}
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle) {
if (handle.index >= (uint32_t)r_VkMaxRenderTargetCount.i)
return NULL;
rtLockRead(&_lock);
rt_render_target *res = NULL;
if (_render_targets[handle.index].version == handle.version)
res = &_render_targets[handle.index].render_target;
else
rtLog("VK", "Tried to access a render target using an outdated handle.");
rtUnlockRead(&_lock);
return res;
}
rt_render_target_handle rtGetSwapchainRenderTarget(void) {
return _swapchain_handle;
}
void rtUpdateSwapchainRenderTarget(void) {
RT_ASSERT(_swapchain_handle.index != 0, "Invalid swap chain render target!");
rt_render_target_slot *slot = &_render_targets[_swapchain_handle.index];
rt_render_target *rt = &slot->render_target;
rt->match_swapchain = 0;
rt->format = g_swapchain.format;
rt->extent = g_swapchain.extent;
rt->sample_count = 1;
rt->usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
rt->aspect = VK_IMAGE_ASPECT_COLOR_BIT;
for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
rt->allocation[i] = NULL;
rt->image[i] = g_swapchain.images[i];
rt->view[i] = g_swapchain.image_views[i];
rt->states[i] = RT_RENDER_TARGET_STATE_INVALID;
}
}
void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, VkExtent2D extent) {
rtLockWrite(&_lock);
for (uint32_t i = 1; i < (uint32_t)r_VkMaxRenderTargetCount.i; ++i) {
if (_render_targets[i].render_target.image_count == 0)
continue;
rt_render_target *render_target = &_render_targets[i].render_target;
if (render_target->match_swapchain != 0) {
for (uint32_t j = 0; j < render_target->image_count; ++j) {
vkDestroyImageView(g_gpu.device, render_target->view[j], g_gpu.alloc_cb);
vmaDestroyImage(g_gpu.allocator,
render_target->image[j],
render_target->allocation[j]);
}
if ((render_target->match_swapchain & RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT) != 0) {
render_target->format = format;
} else if ((render_target->match_swapchain & RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE) !=
0) {
render_target->extent = extent;
}
for (uint32_t j = 0; j < image_count; ++j) {
if (!CreateImageAndView(render_target->extent,
render_target->format,
render_target->sample_count,
render_target->usage,
render_target->aspect,
&render_target->image[j],
&render_target->allocation[j],
&render_target->view[j],
NULL,
j)) {
render_target->image_count = j;
DestroyRenderTarget(&_render_targets[i]);
rtReportError("VK", "Failed to recreate swapchain-matching render target");
break;
}
}
} else if (render_target->image_count < image_count) {
/* Create additional images */
for (uint32_t j = render_target->image_count; j < image_count; ++j) {
if (!CreateImageAndView(render_target->extent,
render_target->format,
render_target->sample_count,
render_target->usage,
render_target->aspect,
&render_target->image[j],
&render_target->allocation[j],
&render_target->view[j],
NULL,
j)) {
render_target->image_count = j;
DestroyRenderTarget(&_render_targets[i]);
rtReportError("VK", "Failed to create additional render target images");
break;
}
}
} else if (render_target->image_count > image_count) {
/* Delete unnecessary images */
for (uint32_t j = image_count; j < render_target->image_count; ++j) {
vkDestroyImageView(g_gpu.device, render_target->view[j], g_gpu.alloc_cb);
vmaDestroyImage(g_gpu.allocator,
render_target->image[j],
render_target->allocation[j]);
}
}
render_target->image_count = image_count;
}
rtUnlockWrite(&_lock);
}

View File

@ -1,44 +0,0 @@
#ifndef RT_VK_RENDER_TARGETS_H
#define RT_VK_RENDER_TARGETS_H
#include "gpu.h"
#include "gfx/renderer_api.h"
#include "../common/common_render_graph.h"
/* Must match RT_VK_MAX_SWAPCHAIN_IMAGES */
#define RT_VK_RENDER_TARGET_MAX_IMAGES 3
typedef enum {
RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE = 0x01,
RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT = 0x02,
} rt_render_target_match_swapchain_flags;
typedef struct {
VkImage image[RT_VK_RENDER_TARGET_MAX_IMAGES];
VkImageView view[RT_VK_RENDER_TARGET_MAX_IMAGES];
VmaAllocation allocation[RT_VK_RENDER_TARGET_MAX_IMAGES];
rt_render_target_state states[RT_VK_RENDER_TARGET_MAX_IMAGES];
VkSampleCountFlagBits sample_count;
VkFormat format;
VkExtent2D extent;
VkImageUsageFlagBits usage;
VkImageAspectFlags aspect;
unsigned int image_count;
rt_render_target_match_swapchain_flags match_swapchain;
} rt_render_target;
rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info);
void rtDestroyRenderTarget(rt_render_target_handle handle);
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle);
rt_render_target_handle rtGetSwapchainRenderTarget(void);
/* Update the render target that represents the swap chain */
void rtUpdateSwapchainRenderTarget(void);
/* Update render targets that match the swap chain*/
void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, VkExtent2D extent);
#endif

View File

@ -1,80 +0,0 @@
#ifndef RT_VK_RESOURCES_H
#define RT_VK_RESOURCES_H
/* Buffers and images */
#include "gpu.h"
#include "runtime/threading.h"
typedef enum {
RT_BUFFER_STATE_INVALID,
RT_BUFFER_STATE_NOT_USED,
RT_BUFFER_STATE_IN_USE,
RT_BUFFER_STATE_IN_TRANSFER,
} rt_buffer_state;
typedef struct {
VkBuffer buffer;
VmaAllocation allocation;
size_t size;
rt_buffer_usage usage;
rt_buffer_type type;
rt_buffer_state state;
rt_rwlock lock;
bool mappable;
bool coherent;
rt_gpu_queue owner;
} rt_buffer;
rt_buffer *rtGetBuffer(rt_buffer_handle handle);
/* Helper functions for accessing buffers */
RT_INLINE rt_gpu_queue rtGetBufferOwner(rt_buffer_handle handle) {
rt_buffer *buffer = rtGetBuffer(handle);
rt_gpu_queue owner = RT_VK_UNOWNED;
if (buffer) {
rtLockRead(&buffer->lock);
owner = buffer->owner;
rtUnlockRead(&buffer->lock);
}
return owner;
}
RT_INLINE void rtSetBufferOwner(rt_buffer_handle handle, rt_gpu_queue owner) {
rt_buffer *buffer = rtGetBuffer(handle);
if (buffer) {
rtLockWrite(&buffer->lock);
buffer->owner = owner;
rtUnlockWrite(&buffer->lock);
}
}
RT_INLINE rt_buffer_state rtGetBufferState(rt_buffer_handle handle) {
rt_buffer *buffer = rtGetBuffer(handle);
rt_buffer_state state = RT_BUFFER_STATE_INVALID;
if (buffer) {
rtLockRead(&buffer->lock);
state = buffer->state;
rtUnlockRead(&buffer->lock);
}
return state;
}
RT_INLINE void rtSetBufferState(rt_buffer_handle handle, rt_buffer_state state) {
rt_buffer *buffer = rtGetBuffer(handle);
if (buffer) {
rtLockWrite(&buffer->lock);
buffer->state = state;
rtUnlockWrite(&buffer->lock);
}
}
#endif

View File

@ -1,6 +0,0 @@
#include "gpu.h"
extern "C" {
#define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION
#include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h"
}

View File

@ -1,205 +0,0 @@
#define RT_VK_DONT_DEFINE_SWAPCHAIN_GLOBAL
#include "swapchain.h"
#include "gpu.h"
#include "runtime/config.h"
#include <stdlib.h>
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#elif defined(RT_USE_XLIB)
#include <X11/Xlib.h>
#endif
RT_CVAR_I(r_VkPreferredSwapchainImages,
"Preferred number of swapchain iamges. [2/3] Default: 2",
2);
RT_CVAR_I(r_VkPreferMailboxMode, "Prefer mailbox present mode over fifo mode. [0/1] Default: 0", 1);
typedef struct {
VkPresentModeKHR present_mode;
VkSurfaceFormatKHR surface_format;
VkExtent2D extent;
VkSurfaceTransformFlagsKHR pre_transform;
} rt_device_swapchain_parameters;
static rt_device_swapchain_parameters DetermineSwapchainParameters(void) {
rt_device_swapchain_parameters params;
/* determine presentation mode. FIFO should always be available.
* TODO: If vsync is enabled, we should always choose FIFO.
*/
params.present_mode = VK_PRESENT_MODE_FIFO_KHR;
if (r_VkPreferMailboxMode.i) {
VkPresentModeKHR modes[6];
uint32_t count = 6;
vkGetPhysicalDeviceSurfacePresentModesKHR(g_gpu.phys_device, g_gpu.surface, &count, modes);
for (uint32_t i = 0; i < count; ++i) {
if (modes[i] == VK_PRESENT_MODE_MAILBOX_KHR)
params.present_mode = VK_PRESENT_MODE_MAILBOX_KHR;
}
}
/* Determine surface format */
VkSurfaceFormatKHR formats[64];
uint32_t format_count = 64;
vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, NULL);
vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, formats);
params.surface_format = formats[0];
for (uint32_t i = 0; i < format_count; ++i) {
if (formats[i].format == VK_FORMAT_B8G8R8A8_SRGB &&
formats[i].colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
params.surface_format = formats[i];
break;
}
}
/* get extent */
VkSurfaceCapabilitiesKHR capabilities;
vkGetPhysicalDeviceSurfaceCapabilitiesKHR(g_gpu.phys_device, g_gpu.surface, &capabilities);
if (capabilities.currentExtent.width != UINT32_MAX) {
params.extent = capabilities.currentExtent;
} else {
#ifdef _WIN32
RECT client_area;
GetClientRect(g_gpu.native_window.hWnd, &client_area);
params.extent.width = (uint32_t)client_area.right;
params.extent.height = (uint32_t)client_area.bottom;
#else
XWindowAttributes attribs;
XGetWindowAttributes(g_gpu.native_window.display, g_gpu.native_window.window, &attribs);
params.extent.width = (uint32_t)attribs.width;
params.extent.height = (uint32_t)attribs.height;
#endif
}
params.pre_transform = capabilities.currentTransform;
return params;
}
rt_swapchain g_swapchain;
rt_result rtCreateSwapchain(void) {
rt_device_swapchain_parameters device_params = DetermineSwapchainParameters();
uint32_t image_count = r_VkPreferredSwapchainImages.i;
if (image_count < 2)
image_count = 2;
else if (image_count > 3)
image_count = 3;
VkSwapchainCreateInfoKHR swapchain_info = {
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.surface = g_gpu.surface,
.presentMode = device_params.present_mode,
.imageFormat = device_params.surface_format.format,
.imageColorSpace = device_params.surface_format.colorSpace,
.imageExtent = device_params.extent,
.preTransform = device_params.pre_transform,
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
.clipped = VK_TRUE,
.minImageCount = image_count,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.imageArrayLayers = 1,
.oldSwapchain = VK_NULL_HANDLE,
};
uint32_t queue_families[] = {g_gpu.graphics_family, g_gpu.present_family};
if (g_gpu.present_family != g_gpu.graphics_family) {
swapchain_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
swapchain_info.pQueueFamilyIndices = queue_families;
swapchain_info.queueFamilyIndexCount = 2;
} else {
swapchain_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
swapchain_info.pQueueFamilyIndices = NULL;
swapchain_info.queueFamilyIndexCount = 0;
}
if (vkCreateSwapchainKHR(g_gpu.device,
&swapchain_info,
g_gpu.alloc_cb,
&g_swapchain.swapchain) != VK_SUCCESS) {
rtReportError("vk", "Failed to create the swapchain");
return 50;
}
g_swapchain.format = device_params.surface_format.format;
g_swapchain.extent = device_params.extent;
/* Retrieve images */
g_swapchain.image_count = 0;
vkGetSwapchainImagesKHR(g_gpu.device, g_swapchain.swapchain, &g_swapchain.image_count, NULL);
if (g_swapchain.image_count > RT_VK_MAX_SWAPCHAIN_IMAGES) {
rtReportError("vk", "Unsupported number of swapchain images: %u", g_swapchain.image_count);
return 51;
}
vkGetSwapchainImagesKHR(g_gpu.device,
g_swapchain.swapchain,
&g_swapchain.image_count,
g_swapchain.images);
/* Create image views */
for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
VkImageViewCreateInfo view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = g_swapchain.images[i],
.format = g_swapchain.format,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseArrayLayer = 0,
.layerCount = 1,
.baseMipLevel = 0,
.levelCount = 1,
},
};
if (vkCreateImageView(g_gpu.device,
&view_info,
g_gpu.alloc_cb,
&g_swapchain.image_views[i]) != VK_SUCCESS) {
rtReportError("vk", "Failed to create an image view for the swapchain.");
return 52;
}
}
/* Create fences */
for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
VkFenceCreateInfo fence_info = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
/* Create as signalled so that we can wait on it the first time we render to that
swapchain image. */
.flags = VK_FENCE_CREATE_SIGNALED_BIT,
};
if (vkCreateFence(g_gpu.device,
&fence_info,
g_gpu.alloc_cb,
&g_swapchain.image_fences[i]) != VK_SUCCESS) {
rtReportError("vk", "Failed to create a fence for the swapchain");
return 53;
}
}
return RT_SUCCESS;
}
rt_result rtRecreateSwapchain(void) {
/* TODO(Kevin): Old swapchain in swapchain create info */
rtDestroySwapchain();
return rtCreateSwapchain();
}
void rtDestroySwapchain(void) {
for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
vkDestroyFence(g_gpu.device, g_swapchain.image_fences[i], g_gpu.alloc_cb);
vkDestroyImageView(g_gpu.device, g_swapchain.image_views[i], g_gpu.alloc_cb);
}
vkDestroySwapchainKHR(g_gpu.device, g_swapchain.swapchain, g_gpu.alloc_cb);
}

View File

@ -1,30 +0,0 @@
#ifndef RT_VK_SWAPCHAIN_H
#define RT_VK_SWAPCHAIN_H
#include <volk/volk.h>
#include "runtime/runtime.h"
#define RT_VK_MAX_SWAPCHAIN_IMAGES 3
typedef struct {
VkSwapchainKHR swapchain;
VkImage images[RT_VK_MAX_SWAPCHAIN_IMAGES];
VkImageView image_views[RT_VK_MAX_SWAPCHAIN_IMAGES];
VkFence image_fences[RT_VK_MAX_SWAPCHAIN_IMAGES];
uint32_t image_count;
VkFormat format;
VkExtent2D extent;
} rt_swapchain;
#ifndef RT_VK_DONT_DEFINE_SWAPCHAIN_GLOBAL
extern rt_swapchain g_swapchain;
#endif
rt_result rtCreateSwapchain(void);
rt_result rtRecreateSwapchain(void);
void rtDestroySwapchain(void);
#endif

View File

@ -1,263 +0,0 @@
#include "transfers.h"
#include "command_buffers.h"
#include "runtime/config.h"
#include "runtime/mem_arena.h"
#include "runtime/threading.h"
#include <stdbool.h>
RT_CVAR_I(rt_VkTransferSlotCount,
"Number of available transfer slots per frame. Default: 512",
512);
/* This is a temporary solution. We probably should keep a pool of buffers
* to avoid re-creating the buffers all the time. */
typedef struct {
VkBuffer buffer;
VmaAllocation allocation;
bool requires_flush;
} rt_transfer_buffer;
typedef struct {
rt_transfer_buffer tbuf;
VkFence fence;
VkSemaphore ownership_transfer;
} rt_transfer;
static rt_transfer *_transfers;
static uint32_t _transfer_count;
static rt_mutex *_transfer_lock;
static rt_transfer_buffer AcquireTransferBuffer(size_t size) {
rt_transfer_buffer tbuf = {VK_NULL_HANDLE};
VkBufferCreateInfo buffer_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = size,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
VmaAllocationCreateInfo alloc_info = {
.usage = VMA_MEMORY_USAGE_AUTO,
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
};
if (vmaCreateBuffer(g_gpu.allocator,
&buffer_info,
&alloc_info,
&tbuf.buffer,
&tbuf.allocation,
NULL) == VK_SUCCESS) {
VkMemoryPropertyFlags props;
vmaGetAllocationMemoryProperties(g_gpu.allocator, tbuf.allocation, &props);
tbuf.requires_flush = (props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0;
}
return tbuf;
}
static void ReturnTransferBuffer(rt_transfer_buffer buffer) {
vmaDestroyBuffer(g_gpu.allocator, buffer.buffer, buffer.allocation);
}
static void CopyToTransferBuffer(rt_transfer_buffer buffer, const void *data, size_t n) {
void *tmem = NULL;
vmaMapMemory(g_gpu.allocator, buffer.allocation, &tmem);
RT_ASSERT(tmem, "Transfer Buffer memory must be mappable.");
memcpy(tmem, data, n);
vmaUnmapMemory(g_gpu.allocator, buffer.allocation);
if (buffer.requires_flush)
vmaFlushAllocation(g_gpu.allocator, buffer.allocation, 0, n);
}
rt_result InitializeTransfers(void) {
_transfer_lock = rtCreateMutex();
if (!_transfer_lock)
return RT_UNKNOWN_ERROR;
_transfers = calloc((size_t)rt_VkTransferSlotCount.i, sizeof(rt_transfer));
if (!_transfers) {
rtDestroyMutex(_transfer_lock);
return RT_OUT_OF_MEMORY;
}
_transfer_count = 0;
return RT_SUCCESS;
}
void ShutdownTransfers(void) {
rtDestroyMutex(_transfer_lock);
for (int i = 0; i < rt_VkTransferSlotCount.i; ++i) {
if (_transfers[i].fence)
vkDestroyFence(g_gpu.device, _transfers[i].fence, g_gpu.alloc_cb);
}
free(_transfers);
}
#define TRANSFER_FAILED -1
#define TRANSFER_NOT_NEEDED 0
#define TRANSFER_STARTED 1
static int AcquireBufferOwnership(rt_transfer *transfer,
VkBuffer buffer,
rt_gpu_queue current_owner,
VkCommandBuffer transfer_cmd) {
if (!transfer->ownership_transfer) {
VkSemaphoreCreateInfo sem_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
};
if (vkCreateSemaphore(g_gpu.device,
&sem_info,
g_gpu.alloc_cb,
&transfer->ownership_transfer) != VK_SUCCESS) {
rtReportError("vk", "Failed to create an ownership transfer semaphore.");
return TRANSFER_FAILED;
}
}
uint32_t src_family = rtGetQueueFamily(current_owner);
uint32_t dst_family = rtGetQueueFamily(RT_TRANSFER_QUEUE);
if (src_family == dst_family)
return TRANSFER_NOT_NEEDED;
VkCommandBuffer cmd = rtAllocSingleCommandBuffer(current_owner);
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
vkBeginCommandBuffer(cmd, &begin_info);
VkBufferMemoryBarrier2 release_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
.buffer = buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
.srcStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
.srcAccessMask = 0,
.srcQueueFamilyIndex = src_family,
.dstQueueFamilyIndex = dst_family,
};
VkDependencyInfo dep = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = &release_barrier,
.bufferMemoryBarrierCount = 1};
vkCmdPipelineBarrier2(cmd, &dep);
vkEndCommandBuffer(cmd);
VkBufferMemoryBarrier2 acquire_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
.buffer = buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
.dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.srcQueueFamilyIndex = src_family,
.dstQueueFamilyIndex = dst_family,
};
VkDependencyInfo dep2 = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = &acquire_barrier,
.bufferMemoryBarrierCount = 1};
vkCmdPipelineBarrier2(transfer_cmd, &dep2);
/* Only transfer the ownership when the frame is finished */
VkSemaphore wait_semaphore = VK_NULL_HANDLE;
rt_frame_data *frame = rtGetFrameData(g_gpu.current_frame_id);
wait_semaphore = frame->render_finished;
uint32_t dummy = 0;
if (rtSubmitSingleCommandBuffer(cmd,
&wait_semaphore,
&dummy,
1,
&transfer->ownership_transfer,
&dummy,
1,
current_owner,
VK_NULL_HANDLE) != RT_SUCCESS)
return TRANSFER_FAILED;
return TRANSFER_STARTED;
}
rt_result rtUploadToBuffer(VkBuffer buffer,
VmaAllocation allocation,
rt_gpu_queue current_owner,
const void *data,
size_t nbytes) {
rtLockMutex(_transfer_lock);
rt_transfer *transfer =
(int)_transfer_count < rt_VkTransferSlotCount.i ? &_transfers[_transfer_count++] : NULL;
rtUnlockMutex(_transfer_lock);
if (!transfer)
return RT_NO_TRANSFER_SLOTS;
transfer->tbuf = AcquireTransferBuffer(nbytes);
if (!transfer->tbuf.buffer) {
return RT_OUT_OF_MEMORY;
}
CopyToTransferBuffer(transfer->tbuf, data, nbytes);
if (!transfer->fence) {
VkFenceCreateInfo fence_info = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
};
if (vkCreateFence(g_gpu.device, &fence_info, g_gpu.alloc_cb, &transfer->fence) !=
VK_SUCCESS) {
return RT_UNKNOWN_ERROR;
}
}
VkCommandBuffer cmd = rtAllocSingleCommandBuffer(RT_TRANSFER_QUEUE);
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
vkBeginCommandBuffer(cmd, &begin_info);
bool requires_ownership_transfer =
(current_owner != RT_TRANSFER_QUEUE && current_owner != RT_VK_UNOWNED);
if (requires_ownership_transfer) {
int did_transfer = AcquireBufferOwnership(transfer, buffer, current_owner, cmd);
if (did_transfer == -1)
return RT_UNKNOWN_ERROR;
else if (did_transfer == TRANSFER_NOT_NEEDED)
requires_ownership_transfer = false;
}
VkBufferCopy region = {.srcOffset = 0, .dstOffset = 0, .size = nbytes};
vkCmdCopyBuffer(cmd, transfer->tbuf.buffer, buffer, 1, &region);
vkEndCommandBuffer(cmd);
uint32_t dummy = 0;
return rtSubmitSingleCommandBuffer(cmd,
requires_ownership_transfer ? &transfer->ownership_transfer
: NULL,
requires_ownership_transfer ? &dummy : NULL,
requires_ownership_transfer ? 1 : 0,
NULL,
NULL,
0,
RT_TRANSFER_QUEUE,
transfer->fence);
}
/* Wait until transfers to gpu resources are finished. */
void rtFlushGPUTransfers(void) {
if (_transfer_count == 0)
return;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
RT_ASSERT(temp.arena, "Could not get a temporary arena for flushing gpu transfers.");
rtLockMutex(_transfer_lock);
VkFence *fences = RT_ARENA_PUSH_ARRAY(temp.arena, VkFence, _transfer_count);
if (!fences) {
rtReportError("vk", "Failed to allocate fences array for flushing gpu transfers.");
rtUnlockMutex(_transfer_lock);
return;
}
uint32_t count = 0;
for (uint32_t i = 0; i < _transfer_count; ++i) {
if (!_transfers[i].fence || !_transfers[i].tbuf.buffer)
continue;
fences[count++] = _transfers[i].fence;
}
vkWaitForFences(g_gpu.device, count, fences, VK_TRUE, UINT64_MAX);
for (uint32_t i = 0; i < _transfer_count; ++i) {
if (!_transfers[i].fence || !_transfers[i].tbuf.buffer)
continue;
ReturnTransferBuffer(_transfers[i].tbuf);
}
_transfer_count = 0;
rtUnlockMutex(_transfer_lock);
}

View File

@ -1,16 +0,0 @@
#ifndef RT_VK_TRANSFERS_H
#define RT_VK_TRANSFERS_H
#include "gpu.h"
#include "runtime/runtime.h"
enum {
RT_NO_TRANSFER_SLOTS = RT_CUSTOM_ERROR_START,
};
rt_result rtUploadToBuffer(VkBuffer buffer, VmaAllocation allocation, rt_gpu_queue current_owner, const void *data, size_t nbytes);
/* Wait until transfers to gpu resources are finished. */
void rtFlushGPUTransfers(void);
#endif

View File

@ -1,21 +0,0 @@
#ifdef _MSC_VER
#pragma warning(push, 0)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#pragma GCC diagnostic ignored "-Wmissing-braces"
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#pragma GCC diagnostic ignored "-Wconversion"
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wparentheses"
#endif
#include <volk/volk.h>
#define VMA_STATIC_VULKAN_FUNCTIONS 0
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
#define VMA_IMPLEMENTATION
#include <vma/vk_mem_alloc.h>
#ifdef _MSC_VER
#pragma warning(pop)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif