Rip out renderer code
THIS WILL NOT COMPILE
This commit is contained in:
parent
6b830f3ff2
commit
b0e6839a1c
@ -1,10 +0,0 @@
|
|||||||
#define RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL
|
|
||||||
#include "builtin_objects.h"
|
|
||||||
|
|
||||||
rt_builtin_render_object_types g_builtin_render_object_types;
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void) {
|
|
||||||
g_builtin_render_object_types.render_mesh =
|
|
||||||
rtRegisterRenderObjectType(sizeof(rt_render_mesh), "render_mesh");
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
@ -1,40 +0,0 @@
|
|||||||
#ifndef RT_GFX_BUILTIN_OBJECTS_H
|
|
||||||
#define RT_GFX_BUILTIN_OBJECTS_H
|
|
||||||
|
|
||||||
/* Render Object types used by the builtin graphics passes.
|
|
||||||
*
|
|
||||||
* As an user you are free to not use these, but then you
|
|
||||||
* also cannot use the builtin render passes. */
|
|
||||||
|
|
||||||
#include "renderer_api.h"
|
|
||||||
#include "render_list.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_pipeline_handle pipeline;
|
|
||||||
rt_buffer_handle vbo;
|
|
||||||
rt_buffer_handle ibo;
|
|
||||||
uint32_t vertex_count;
|
|
||||||
uint32_t index_count;
|
|
||||||
} rt_render_mesh;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_render_object_type render_mesh;
|
|
||||||
} rt_builtin_render_object_types;
|
|
||||||
|
|
||||||
#ifndef RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL
|
|
||||||
extern RT_DLLIMPORT rt_builtin_render_object_types g_builtin_render_object_types;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
260
src/gfx/effect.c
260
src/gfx/effect.c
@ -1,260 +0,0 @@
|
|||||||
#include "effect.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/ds.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/hashing.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
#include "runtime/atomics.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
RT_CVAR_SZ(rt_EffectCacheSize, "The number of slots in the effect cache. Default: 1024", 1024);
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_resource_id resource;
|
|
||||||
rt_effect effect;
|
|
||||||
_Alignas(4) unsigned int refcount;
|
|
||||||
} rt_effect_cache_slot;
|
|
||||||
|
|
||||||
/* We use a hashtable to find previously loaded effects.
|
|
||||||
* To reclaim unreferenced slots when we need to, we use a minheap.
|
|
||||||
* The minheap implements a LRU list. To track usage, we use a global running "usage counter",
|
|
||||||
* incremented whenever an effect is loaded.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
rt_effect_cache_slot *slots;
|
|
||||||
rt_hashtable lut;
|
|
||||||
rt_minheap reclaim_heap;
|
|
||||||
|
|
||||||
/* Linearly allocate slots until we reach capacity */
|
|
||||||
size_t next_free;
|
|
||||||
|
|
||||||
/* Used to track "time" since an effect was loaded */
|
|
||||||
_Alignas(4) int usage_counter;
|
|
||||||
|
|
||||||
void *memory;
|
|
||||||
|
|
||||||
rt_rwlock lock;
|
|
||||||
} rt_effect_cache;
|
|
||||||
|
|
||||||
static rt_effect_cache _cache;
|
|
||||||
|
|
||||||
rt_result InitEffectCache(void) {
|
|
||||||
if (!RT_IS_POWER_OF_TWO(rt_EffectCacheSize.sz)) {
|
|
||||||
rtReportError(
|
|
||||||
"GFX",
|
|
||||||
"The value of \"rt_EffectCacheSize\" must be a power of two.\nConfigured: %zu.",
|
|
||||||
rt_EffectCacheSize.sz);
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
|
||||||
if (!lock_res.ok)
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
_cache.lock = lock_res.lock;
|
|
||||||
|
|
||||||
size_t mem_required = sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz +
|
|
||||||
RT_HASH_TABLE_MEMORY_REQUIRED(
|
|
||||||
2 * rt_EffectCacheSize.sz) + /* double to keep performance up */
|
|
||||||
sizeof(int) * rt_EffectCacheSize.sz + /* heap keys */
|
|
||||||
sizeof(size_t) * rt_EffectCacheSize.sz; /* heap values */
|
|
||||||
_cache.memory = malloc(mem_required);
|
|
||||||
if (!_cache.memory) {
|
|
||||||
rtDestroyRWLock(&_cache.lock);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
_cache.lut = rtCreateHashtable(rt_EffectCacheSize.sz, _cache.memory, NULL, NULL);
|
|
||||||
|
|
||||||
int *keys =
|
|
||||||
(int *)((char *)_cache.memory + RT_HASH_TABLE_MEMORY_REQUIRED(2 * rt_EffectCacheSize.sz));
|
|
||||||
size_t *values = (size_t *)(keys + rt_EffectCacheSize.sz);
|
|
||||||
_cache.reclaim_heap = rtCreateMinheap(keys, values, sizeof(size_t), rt_EffectCacheSize.sz, 0);
|
|
||||||
_cache.usage_counter = 0;
|
|
||||||
|
|
||||||
_cache.slots = (rt_effect_cache_slot *)(values + rt_EffectCacheSize.sz);
|
|
||||||
memset(_cache.slots, 0, sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz);
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownEffectCache(void) {
|
|
||||||
free(_cache.memory);
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len) {
|
|
||||||
uint32_t id = rtHashBytes32(name, len);
|
|
||||||
if (id == 0)
|
|
||||||
id = ~id;
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len) {
|
|
||||||
uint32_t id = rtHashBytes32(name, len);
|
|
||||||
if (id == 0)
|
|
||||||
id = ~id;
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ReleaseEffect(rt_effect *effect) {
|
|
||||||
for (unsigned int i = 0; i < effect->pass_count; ++i) {
|
|
||||||
g_renderer.DestroyPipeline(effect->passes[i].pipeline);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Returns the index of the reserved slot */
|
|
||||||
static size_t ReserveSlot(rt_resource_id id) {
|
|
||||||
if (_cache.next_free < rt_EffectCacheSize.sz) {
|
|
||||||
size_t slot = _cache.next_free++;
|
|
||||||
RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use.");
|
|
||||||
_cache.slots[slot].refcount = 1;
|
|
||||||
|
|
||||||
if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) {
|
|
||||||
rtLog("GFX", "Failed to insert effect %x into the lookup table.", id);
|
|
||||||
_cache.slots[slot].refcount = 0;
|
|
||||||
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot);
|
|
||||||
return SIZE_MAX;
|
|
||||||
}
|
|
||||||
_cache.slots[slot].resource = id;
|
|
||||||
return slot;
|
|
||||||
} else if (!rtMinheapIsEmpty(&_cache.reclaim_heap)) {
|
|
||||||
size_t slot;
|
|
||||||
rtMinheapPop(&_cache.reclaim_heap, &slot);
|
|
||||||
RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use.");
|
|
||||||
_cache.slots[slot].refcount = 1;
|
|
||||||
|
|
||||||
rt_resource_id old_id = _cache.slots[slot].resource;
|
|
||||||
RT_ASSERT(old_id != RT_INVALID_RESOURCE_ID, "The slot should contain an old effect.");
|
|
||||||
ReleaseEffect(&_cache.slots[slot].effect);
|
|
||||||
rtHashtableRemove(&_cache.lut, old_id);
|
|
||||||
|
|
||||||
if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) {
|
|
||||||
rtLog("GFX", "Failed to insert effect %x into the lookup table.", id);
|
|
||||||
_cache.slots[slot].refcount = 0;
|
|
||||||
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot);
|
|
||||||
return SIZE_MAX;
|
|
||||||
}
|
|
||||||
_cache.slots[slot].resource = id;
|
|
||||||
return slot;
|
|
||||||
} else {
|
|
||||||
rtLog("GFX",
|
|
||||||
"Could not insert effect %x into the cache, because the effect cache is full.",
|
|
||||||
id);
|
|
||||||
return SIZE_MAX;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Load resource to memory allocated on the given arena */
|
|
||||||
static rt_result LoadResource(rt_resource_id id, void **p_out, rt_arena *arena) {
|
|
||||||
size_t size = rtGetResourceSize(id);
|
|
||||||
if (!size) {
|
|
||||||
rtLog("GFX", "ID %x is not a valid resource.", id);
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
void *dst = rtArenaPush(arena, size);
|
|
||||||
if (!dst) {
|
|
||||||
rtLog("GFX", "Failed to allocate %zu bytes of temporary storage.", size);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
*p_out = dst;
|
|
||||||
return rtGetResource(id, dst);
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result LoadEffect(rt_resource_id id, rt_effect *effect) {
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
if (!temp.arena) {
|
|
||||||
rtLog("GFX", "Could not get a temporary arena.");
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
const rt_resource *resource = NULL;
|
|
||||||
rt_result res = LoadResource(id, &resource, temp.arena);
|
|
||||||
if (res != RT_SUCCESS) {
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
if (resource->type != RT_RESOURCE_EFFECT) {
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
rtLog("GFX", "Resource %x does not refer to an effect resource.", id);
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
const rt_effect_info *effect_info = resource->data;
|
|
||||||
effect->pass_count = effect_info->pass_count;
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i < effect_info->pass_count; ++i) {
|
|
||||||
rt_resource *pipeline_resource = NULL;
|
|
||||||
res = LoadResource(effect_info->passes[i].pipeline, &pipeline_resource, temp.arena);
|
|
||||||
if (res != RT_SUCCESS) {
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
if (pipeline_resource->type != RT_RESOURCE_PIPELINE) {
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
rtLog("GFX", "Resource %x does not refer to a pipeline resource.", id);
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
rt_pipeline_info *pipeline_info = pipeline_resource->data;
|
|
||||||
rt_pipeline_handle pipeline = g_renderer.CompilePipeline(pipeline_info);
|
|
||||||
if (!RT_IS_HANDLE_VALID(pipeline)) {
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
rtLog("GFX",
|
|
||||||
"Failed to compile the pipeline of pass %d (%x).",
|
|
||||||
i,
|
|
||||||
effect_info->passes[i].pass_id);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
effect->passes[i].pass_id = effect_info->passes[i].pass_id;
|
|
||||||
effect->passes[i].pipeline = pipeline;
|
|
||||||
}
|
|
||||||
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect) {
|
|
||||||
rtAtomic32Inc(&_cache.usage_counter);
|
|
||||||
|
|
||||||
/* Check if the effect is already loaded */
|
|
||||||
rtLockRead(&_cache.lock);
|
|
||||||
uint64_t slot = rtHashtableLookup(&_cache.lut, id, UINT64_MAX);
|
|
||||||
if (slot != UINT64_MAX) {
|
|
||||||
|
|
||||||
RT_ASSERT(_cache.slots[slot].resource == id, "Got the wrong effect");
|
|
||||||
rtAtomic32Inc(&_cache.slots[slot].refcount);
|
|
||||||
*effect = &_cache.slots[slot].effect;
|
|
||||||
rtUnlockRead(&_cache.lock);
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
rtUnlockRead(&_cache.lock);
|
|
||||||
|
|
||||||
/* Load the effect */
|
|
||||||
rtLockWrite(&_cache.lock);
|
|
||||||
if (rtHashtableLookup(&_cache.lut, id, UINT64_MAX) != UINT64_MAX) {
|
|
||||||
/* Another thread was faster than we, just retry */
|
|
||||||
rtUnlockWrite(&_cache.lock);
|
|
||||||
return rtLoadEffect(id, effect);
|
|
||||||
}
|
|
||||||
slot = ReserveSlot(id);
|
|
||||||
if (slot == SIZE_MAX) {
|
|
||||||
rtUnlockWrite(&_cache.lock);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
rt_result res = LoadEffect(id, &_cache.slots[slot].effect);
|
|
||||||
rtUnlockWrite(&_cache.lock);
|
|
||||||
*effect = &_cache.slots[slot].effect;
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect) {
|
|
||||||
RT_VERIFY(effect);
|
|
||||||
rt_effect_cache_slot *slot = (rt_effect_cache_slot *)((char *)effect - offsetof(rt_effect_cache_slot, effect));
|
|
||||||
if (rtAtomic32Dec(&slot->refcount) == 0) {
|
|
||||||
rtLockWrite(&_cache.lock);
|
|
||||||
size_t slot_index = (size_t)(slot - _cache.slots);
|
|
||||||
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot_index);
|
|
||||||
rtUnlockWrite(&_cache.lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
|||||||
#ifndef RT_GFX_EFFECT_H
|
|
||||||
#define RT_GFX_EFFECT_H
|
|
||||||
|
|
||||||
/* A effect lists the passes during which an object needs to be rendered
|
|
||||||
* and a pipeline for each pass.
|
|
||||||
* The effect also defines the required vertex layout per pass.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "gfx.h"
|
|
||||||
#include "renderer_api.h"
|
|
||||||
#include "runtime/resources.h"
|
|
||||||
|
|
||||||
/* *** Resource types *** */
|
|
||||||
|
|
||||||
typedef struct rt_pipeline_info_s {
|
|
||||||
rt_resource_id vertex_shader;
|
|
||||||
rt_resource_id fragment_shader;
|
|
||||||
rt_resource_id compute_shader;
|
|
||||||
|
|
||||||
/* TODO(Kevin): Fixed function settings */
|
|
||||||
} rt_pipeline_info;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
/* Id of the render pass during which this effect pass is run. */
|
|
||||||
uint32_t pass_id;
|
|
||||||
rt_resource_id pipeline;
|
|
||||||
} rt_effect_pass_info;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t pass_count;
|
|
||||||
rt_effect_pass_info passes[RT_MAX_SUBRESOURCES];
|
|
||||||
} rt_effect_info;
|
|
||||||
|
|
||||||
/* *** Runtime types *** */
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t pass_id;
|
|
||||||
rt_pipeline_handle pipeline;
|
|
||||||
} rt_effect_pass;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t pass_count;
|
|
||||||
rt_effect_pass passes[RT_MAX_SUBRESOURCES];
|
|
||||||
} rt_effect;
|
|
||||||
|
|
||||||
RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len);
|
|
||||||
RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len);
|
|
||||||
|
|
||||||
/* Load an effect from a resource file.
|
|
||||||
* Returns:
|
|
||||||
* - RT_SUCCESS
|
|
||||||
* - RT_OUT_OF_MEMORY, if temporary memory allocations failed
|
|
||||||
* - RT_INVALID_VALUE, if id does not refer to an effect resource.
|
|
||||||
* - RT_UNKNOWN_ERROR, if a pipeline failed to compile
|
|
||||||
* - errors returned by rtGetResource() */
|
|
||||||
RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect);
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,88 +0,0 @@
|
|||||||
#ifndef RT_GFX_H
|
|
||||||
#define RT_GFX_H
|
|
||||||
|
|
||||||
/* graphics system. this is the interface of the rendering code.
|
|
||||||
*
|
|
||||||
* we need (at least) three different renderers:
|
|
||||||
* - world cell renderer (for world & dungeon environments)
|
|
||||||
* - character renderer (for animated models)
|
|
||||||
* - object renderer (for static models)
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wpedantic"
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
#pragma warning(push)
|
|
||||||
#pragma warning(disable : 4201) /* anonymous struct */
|
|
||||||
#endif
|
|
||||||
typedef union {
|
|
||||||
float v[4];
|
|
||||||
struct {
|
|
||||||
float r;
|
|
||||||
float g;
|
|
||||||
float b;
|
|
||||||
float a;
|
|
||||||
};
|
|
||||||
} rt_color;
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
#pragma warning(pop)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* NOTE(kevin): When you add a value here, you need to handle them in
|
|
||||||
* framegraph_processor.c : ParseFramegraph
|
|
||||||
* and in the render target and texture functions of all renderers. */
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_PIXEL_FORMAT_INVALID,
|
|
||||||
|
|
||||||
RT_PIXEL_FORMAT_R8G8B8A8_UNORM,
|
|
||||||
RT_PIXEL_FORMAT_B8G8R8A8_UNORM,
|
|
||||||
RT_PIXEL_FORMAT_R8G8B8A8_SRGB,
|
|
||||||
RT_PIXEL_FORMAT_B8G8R8A8_SRGB,
|
|
||||||
RT_PIXEL_FORMAT_R8G8B8_UNORM,
|
|
||||||
RT_PIXEL_FORMAT_B8G8R8_UNORM,
|
|
||||||
RT_PIXEL_FORMAT_R8G8B8_SRGB,
|
|
||||||
RT_PIXEL_FORMAT_B8G8R8_SRGB,
|
|
||||||
|
|
||||||
RT_PIXEL_FORMAT_DEPTH24_STENCIL8,
|
|
||||||
RT_PIXEL_FORMAT_DEPTH32,
|
|
||||||
|
|
||||||
/* Special value indicating whichever format the swapchain uses */
|
|
||||||
RT_PIXEL_FORMAT_SWAPCHAIN,
|
|
||||||
|
|
||||||
RT_PIXEL_FORMAT_count,
|
|
||||||
} rt_pixel_format;
|
|
||||||
|
|
||||||
RT_INLINE int rtIsDepthFormat(rt_pixel_format format) {
|
|
||||||
return format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8 || format == RT_PIXEL_FORMAT_DEPTH32;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* In renderer_api.h -> Not necessary for almost all gfx usage */
|
|
||||||
typedef struct rt_renderer_init_info_s rt_renderer_init_info;
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtRegisterRendererCVars(void);
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info);
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtShutdownGFX(void);
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id);
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,224 +0,0 @@
|
|||||||
#include <stdbool.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#define RT_DONT_DEFINE_RENDERER_GLOBAL
|
|
||||||
#include "gfx.h"
|
|
||||||
#include "renderer_api.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/dynamic_libs.h"
|
|
||||||
|
|
||||||
/* Attributes are used to bind buffers (or textures) to symbolic values.
|
|
||||||
* For example, an attribute might be bound to "CELL_GRID", which would be
|
|
||||||
* replaced with the (at the time of the invoke) grid buffer of the current
|
|
||||||
* world cell.
|
|
||||||
*/
|
|
||||||
|
|
||||||
rt_renderer_api g_renderer;
|
|
||||||
|
|
||||||
#ifndef RT_STATIC_LIB
|
|
||||||
static rt_dynlib _renderer_lib;
|
|
||||||
#endif
|
|
||||||
static bool _renderer_loaded = false;
|
|
||||||
|
|
||||||
RT_DLLEXPORT
|
|
||||||
RT_CVAR_S(rt_Renderer,
|
|
||||||
"Select the render backend. Available options: [vk, dx11, null], Default: vk",
|
|
||||||
"dx11");
|
|
||||||
|
|
||||||
extern rt_cvar rt_RenderViewArenaSize;
|
|
||||||
extern rt_cvar rt_RenderListPoolSize;
|
|
||||||
|
|
||||||
#ifdef RT_STATIC_LIB
|
|
||||||
extern void RT_RENDERER_API_FN(RegisterCVars)(void);
|
|
||||||
extern rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *);
|
|
||||||
extern void RT_RENDERER_API_FN(Shutdown)(void);
|
|
||||||
extern unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void);
|
|
||||||
extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int);
|
|
||||||
extern void RT_RENDERER_API_FN(EndFrame)(unsigned int);
|
|
||||||
extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *);
|
|
||||||
extern void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle);
|
|
||||||
extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t,
|
|
||||||
const rt_alloc_command_buffer_info *,
|
|
||||||
rt_command_buffer_handle *);
|
|
||||||
extern rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue,
|
|
||||||
const rt_submit_command_buffers_info *);
|
|
||||||
extern rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t,
|
|
||||||
const rt_gpu_semaphore_info *,
|
|
||||||
rt_gpu_semaphore_handle *);
|
|
||||||
extern void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *);
|
|
||||||
extern uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle);
|
|
||||||
extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void);
|
|
||||||
extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void);
|
|
||||||
extern rt_result
|
|
||||||
RT_RENDERER_API_FN(CreateBuffers)(uint32_t, const rt_buffer_info *, rt_buffer_handle *);
|
|
||||||
extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *);
|
|
||||||
|
|
||||||
extern rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void);
|
|
||||||
extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *);
|
|
||||||
extern rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *, unsigned int);
|
|
||||||
extern void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
|
|
||||||
uint32_t pass_id,
|
|
||||||
rt_render_view view,
|
|
||||||
unsigned int frame_id);
|
|
||||||
extern void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph, unsigned int frame_id);
|
|
||||||
|
|
||||||
extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle,
|
|
||||||
const rt_cmd_begin_pass_info *);
|
|
||||||
extern void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle);
|
|
||||||
extern void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle,
|
|
||||||
rt_render_target_handle,
|
|
||||||
rt_render_target_state);
|
|
||||||
extern void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle,
|
|
||||||
rt_render_target_handle);
|
|
||||||
extern void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle, rt_pipeline_handle);
|
|
||||||
extern void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle,
|
|
||||||
uint32_t,
|
|
||||||
uint32_t,
|
|
||||||
const rt_buffer_handle *,
|
|
||||||
const uint32_t *,
|
|
||||||
const uint32_t *);
|
|
||||||
extern void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle, uint32_t, uint32_t);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern rt_result InitRenderLists(void);
|
|
||||||
extern void ShutdownRenderLists(void);
|
|
||||||
extern void ResetRenderLists(unsigned int frame_id);
|
|
||||||
extern rt_result InitRenderViews(void);
|
|
||||||
extern void ShutdownRenderViews(void);
|
|
||||||
extern void ResetRenderViews(unsigned int frame_id);
|
|
||||||
extern rt_result InitEffectCache(void);
|
|
||||||
extern void ShutdownEffectCache(void);
|
|
||||||
|
|
||||||
static bool LoadRenderer(void) {
|
|
||||||
|
|
||||||
#if !defined(RT_STATIC_LIB)
|
|
||||||
#define RETRIEVE_SYMBOL(name, type) \
|
|
||||||
g_renderer.name = (type *)rtGetSymbol(_renderer_lib, "rtRen" #name); \
|
|
||||||
if (!g_renderer.name) { \
|
|
||||||
rtReportError("GFX", \
|
|
||||||
"Unable to retrieve renderer function %s from backend %s", \
|
|
||||||
#name, \
|
|
||||||
rt_Renderer.s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strcmp(rt_Renderer.s, "vk") == 0) {
|
|
||||||
_renderer_lib = rtOpenLib(RT_DLLNAME("rtvk"));
|
|
||||||
if (!_renderer_lib) {
|
|
||||||
rtReportError("GFX", "Unable to load renderer backend: %s", RT_DLLNAME("rtvk"));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
RETRIEVE_SYMBOL(RegisterCVars, rt_register_renderer_cvars_fn);
|
|
||||||
RETRIEVE_SYMBOL(Init, rt_init_renderer_fn);
|
|
||||||
RETRIEVE_SYMBOL(Shutdown, rt_shutdown_renderer_fn);
|
|
||||||
RETRIEVE_SYMBOL(GetMaxFramesInFlight, rt_get_max_frames_in_flight_fn);
|
|
||||||
RETRIEVE_SYMBOL(BeginFrame, rt_begin_frame_fn);
|
|
||||||
RETRIEVE_SYMBOL(EndFrame, rt_end_frame_fn);
|
|
||||||
RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn);
|
|
||||||
RETRIEVE_SYMBOL(DestroyPipeline, rt_destroy_pipeline_fn);
|
|
||||||
RETRIEVE_SYMBOL(AllocCommandBuffers, rt_alloc_command_buffers_fn);
|
|
||||||
RETRIEVE_SYMBOL(SubmitCommandBuffers, rt_submit_command_buffers_fn);
|
|
||||||
RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn);
|
|
||||||
RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn);
|
|
||||||
RETRIEVE_SYMBOL(CreateRenderGrapbuilder, rt_create_render_graph_builder_fn);
|
|
||||||
RETRIEVE_SYMBOL(DestroyRenderGraphBuilder, rt_destroy_render_graph_builder_fn);
|
|
||||||
RETRIEVE_SYMBOL(ExecuteRenderGraph, rt_execute_render_graph_fn);
|
|
||||||
RETRIEVE_SYMBOL(SubmitRenderView, rt_submit_render_view_fn);
|
|
||||||
RETRIEVE_SYMBOL(ResetRenderGraph, rt_reset_render_graph_fn);
|
|
||||||
RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn);
|
|
||||||
RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn);
|
|
||||||
RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn);
|
|
||||||
RETRIEVE_SYMBOL(CmdFlushRenderTargetWrite, rt_cmd_flush_render_target_write_fn);
|
|
||||||
RETRIEVE_SYMBOL(CmdBindPipeline, rt_cmd_bind_pipeline_fn);
|
|
||||||
RETRIEVE_SYMBOL(CmdBindVertexBuffers, rt_cmd_bind_vertex_buffers_fn);
|
|
||||||
RETRIEVE_SYMBOL(CmdDraw, rt_cmd_draw_fn);
|
|
||||||
} else {
|
|
||||||
rtReportError("GFX",
|
|
||||||
"Unsupported renderer backend: (%s) %s",
|
|
||||||
rt_Renderer.name,
|
|
||||||
rt_Renderer.s);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#undef RETRIEVE_SYMBOL
|
|
||||||
#else
|
|
||||||
g_renderer.RegisterCVars = &rtRenRegisterCVars;
|
|
||||||
g_renderer.Init = &rtRenInit;
|
|
||||||
g_renderer.Shutdown = &rtRenShutdown;
|
|
||||||
g_renderer.GetMaxFramesInFlight = &rtRenGetMaxFramesInFlight;
|
|
||||||
g_renderer.BeginFrame = &rtRenBeginFrame;
|
|
||||||
g_renderer.EndFrame = &rtRenEndFrame;
|
|
||||||
g_renderer.CompilePipeline = &rtRenCompilePipeline;
|
|
||||||
g_renderer.DestroyPipeline = &rtRenDestroyPipeline;
|
|
||||||
g_renderer.AllocCommandBuffers = &rtRenAllocCommandBuffers;
|
|
||||||
g_renderer.SubmitCommandBuffers = &rtRenSubmitCommandBuffers;
|
|
||||||
g_renderer.CreateBuffers = &rtRenCreateBuffers;
|
|
||||||
g_renderer.DestroyBuffers = &rtRenDestroyBuffers;
|
|
||||||
g_renderer.CreateRenderGraphBuilder = &rtRenCreateRenderGraphBuilder;
|
|
||||||
g_renderer.DestroyRenderGraphBuilder = &rtRenDestroyRenderGraphBuilder;
|
|
||||||
g_renderer.ExecuteRenderGraph = &rtRenExecuteRenderGraph;
|
|
||||||
g_renderer.SubmitRenderView = &rtRenSubmitRenderView;
|
|
||||||
g_renderer.ResetRenderGraph = &rtRenResetRenderGraph;
|
|
||||||
g_renderer.CmdBeginPass = &rtRenCmdBeginPass;
|
|
||||||
g_renderer.CmdEndPass = &rtRenCmdEndPass;
|
|
||||||
g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget;
|
|
||||||
g_renderer.CmdFlushRenderTargetWrite = &rtRenCmdFlushRenderTargetWrite;
|
|
||||||
g_renderer.CmdBindPipeline = &rtRenCmdBindPipeline;
|
|
||||||
g_renderer.CmdBindVertexBuffers = &rtRenCmdBindVertexBuffers;
|
|
||||||
g_renderer.CmdDraw = &rtRenCmdDraw;
|
|
||||||
#endif
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtRegisterRendererCVars(void) {
|
|
||||||
if (!_renderer_loaded) {
|
|
||||||
if (!LoadRenderer())
|
|
||||||
return;
|
|
||||||
_renderer_loaded = true;
|
|
||||||
}
|
|
||||||
g_renderer.RegisterCVars();
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) {
|
|
||||||
rtRegisterCVAR(&rt_Renderer);
|
|
||||||
rtRegisterCVAR(&rt_RenderViewArenaSize);
|
|
||||||
rtRegisterCVAR(&rt_RenderListPoolSize);
|
|
||||||
|
|
||||||
if (!_renderer_loaded) {
|
|
||||||
if (!LoadRenderer())
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
g_renderer.RegisterCVars();
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result result;
|
|
||||||
|
|
||||||
if ((result = g_renderer.Init(renderer_info)) != RT_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
if ((result = InitRenderLists()) != RT_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
if ((result = InitRenderViews()) != RT_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
if ((result = InitEffectCache()) != RT_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtShutdownGFX(void) {
|
|
||||||
ShutdownEffectCache();
|
|
||||||
ShutdownRenderViews();
|
|
||||||
ShutdownRenderLists();
|
|
||||||
g_renderer.Shutdown();
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id) {
|
|
||||||
g_renderer.BeginFrame(frame_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id) {
|
|
||||||
g_renderer.EndFrame(frame_id);
|
|
||||||
ResetRenderLists(frame_id);
|
|
||||||
ResetRenderViews(frame_id);
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
gfx_deps = [thread_dep, m_dep]
|
|
||||||
gfx_lib = library('rtgfx',
|
|
||||||
# Project Sources
|
|
||||||
'builtin_objects.h',
|
|
||||||
'effect.h',
|
|
||||||
'gfx.h',
|
|
||||||
'renderer_api.h',
|
|
||||||
'render_list.h',
|
|
||||||
'render_view.h',
|
|
||||||
|
|
||||||
'builtin_objects.c',
|
|
||||||
'effect.c',
|
|
||||||
'gfx_main.c',
|
|
||||||
'render_list.c',
|
|
||||||
'render_view.c',
|
|
||||||
|
|
||||||
# Contrib Sources
|
|
||||||
dependencies : gfx_deps,
|
|
||||||
include_directories : engine_incdir,
|
|
||||||
link_with : runtime_lib,
|
|
||||||
c_pch : 'pch/gfx_pch.h',
|
|
||||||
install : true)
|
|
||||||
|
|
||||||
engine_libs += gfx_lib
|
|
||||||
engine_lib_paths += gfx_lib.full_path()
|
|
@ -1,11 +0,0 @@
|
|||||||
/* Stdlib */
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
/* Project */
|
|
||||||
#include "gfx.h"
|
|
||||||
|
|
||||||
/* Commonly used runtime headers */
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/config.h"
|
|
@ -1,207 +0,0 @@
|
|||||||
#include "render_list.h"
|
|
||||||
#include "renderer_api.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_RenderListPoolSize,
|
|
||||||
"Size of the pool allocated for render lists in bytes. Default: 8 MiB",
|
|
||||||
RT_MB(8));
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
size_t size;
|
|
||||||
const char *name;
|
|
||||||
} rt_render_object_type_data;
|
|
||||||
|
|
||||||
typedef struct rt_list_pool_s {
|
|
||||||
size_t capacity;
|
|
||||||
struct rt_list_pool_s *next;
|
|
||||||
} rt_list_pool;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_mutex *lock;
|
|
||||||
rt_list_pool *first_free;
|
|
||||||
rt_arena arena;
|
|
||||||
|
|
||||||
unsigned int access_frame_id;
|
|
||||||
} rt_frame_lists;
|
|
||||||
|
|
||||||
#define DEFAULT_LIST_CAPACITY RT_KB(1)
|
|
||||||
|
|
||||||
static rt_render_object_type_data _types[RT_MAX_RENDER_OBJECT_TYPE + 1];
|
|
||||||
static unsigned int _type_count = 0;
|
|
||||||
static rt_rwlock _type_lock;
|
|
||||||
|
|
||||||
static rt_frame_lists _frame_lists[4];
|
|
||||||
static unsigned int _max_frames_in_flight;
|
|
||||||
|
|
||||||
rt_result InitRenderLists(void) {
|
|
||||||
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
|
||||||
if (!lock_res.ok)
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
_type_lock = lock_res.lock;
|
|
||||||
|
|
||||||
_max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
|
|
||||||
RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frame_lists),
|
|
||||||
"Invalid maxium number of in-flight frames.");
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
|
|
||||||
rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderListPoolSize.i);
|
|
||||||
if (!arena_res.ok) {
|
|
||||||
rtDestroyRWLock(&_type_lock);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
_frame_lists[i].arena = arena_res.arena;
|
|
||||||
|
|
||||||
_frame_lists[i].lock = rtCreateMutex();
|
|
||||||
if (!_frame_lists[i].lock) {
|
|
||||||
rtReleaseArena(&_frame_lists[i].arena);
|
|
||||||
rtDestroyRWLock(&_type_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
_frame_lists[i].first_free = NULL;
|
|
||||||
_frame_lists[i].access_frame_id = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownRenderLists(void) {
|
|
||||||
rtDestroyRWLock(&_type_lock);
|
|
||||||
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
|
|
||||||
rtDestroyMutex(_frame_lists[i].lock);
|
|
||||||
rtReleaseArena(&_frame_lists[i].arena);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size,
|
|
||||||
const char *debug_name) {
|
|
||||||
if (_type_count == RT_MAX_RENDER_OBJECT_TYPE) {
|
|
||||||
rtReportError("GFX", "Too many render object types (max is %u)", RT_MAX_RENDER_OBJECT_TYPE);
|
|
||||||
return RT_INVALID_RENDER_OBJECT_TYPE;
|
|
||||||
}
|
|
||||||
rtLockWrite(&_type_lock);
|
|
||||||
rt_render_object_type type = (rt_render_object_type)++_type_count;
|
|
||||||
_types[_type_count].size = object_size;
|
|
||||||
_types[_type_count].name = debug_name;
|
|
||||||
if (debug_name)
|
|
||||||
rtLog("GFX",
|
|
||||||
"Registered render object type %s; object size: %zu. Type: %u",
|
|
||||||
debug_name,
|
|
||||||
object_size,
|
|
||||||
_type_count);
|
|
||||||
else
|
|
||||||
rtLog("GFX",
|
|
||||||
"Registered unnamed render object type; object size: %zu. Type: %u",
|
|
||||||
object_size,
|
|
||||||
_type_count);
|
|
||||||
rtUnlockWrite(&_type_lock);
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type) {
|
|
||||||
size_t size = 0;
|
|
||||||
rtLockRead(&_type_lock);
|
|
||||||
if (type > RT_INVALID_RENDER_OBJECT_TYPE && type <= _type_count)
|
|
||||||
size = _types[type].size;
|
|
||||||
rtUnlockRead(&_type_lock);
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT const char *rtGetRenderObjectTypeDebugName(rt_render_object_type type) {
|
|
||||||
const char *name = NULL;
|
|
||||||
rtLockRead(&_type_lock);
|
|
||||||
if (type > RT_INVALID_RENDER_OBJECT_TYPE && type <= _type_count)
|
|
||||||
name = _types[type].name;
|
|
||||||
rtUnlockRead(&_type_lock);
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_create_render_list_result
|
|
||||||
CreateNewList(rt_render_object_type type, unsigned int frame_id, size_t capacity) {
|
|
||||||
rt_create_render_list_result res = {.ok = false};
|
|
||||||
unsigned int slot = frame_id % _max_frames_in_flight;
|
|
||||||
rtLockMutex(_frame_lists[slot].lock);
|
|
||||||
|
|
||||||
_frame_lists[slot].access_frame_id = frame_id;
|
|
||||||
|
|
||||||
if (!_frame_lists[slot].first_free ||
|
|
||||||
_frame_lists[slot].first_free->capacity < capacity) { /* Allocate a new list */
|
|
||||||
rt_list_pool *pool = rtArenaPush(&_frame_lists[slot].arena,
|
|
||||||
sizeof(rt_list_pool) + sizeof(unsigned int) + capacity);
|
|
||||||
if (!pool) {
|
|
||||||
rtReportError("GFX",
|
|
||||||
"Out of render list pool space! Configured space: %d kiB",
|
|
||||||
rt_RenderListPoolSize.i / 1024);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
pool->capacity = capacity;
|
|
||||||
pool->next = _frame_lists[slot].first_free;
|
|
||||||
_frame_lists[slot].first_free = pool;
|
|
||||||
}
|
|
||||||
rt_render_list list;
|
|
||||||
unsigned int *frame_id_store =
|
|
||||||
(unsigned int *)((char *)_frame_lists[slot].first_free + sizeof(rt_list_pool));
|
|
||||||
*frame_id_store = frame_id;
|
|
||||||
list.data = (char *)_frame_lists[slot].first_free + sizeof(rt_list_pool) + sizeof(unsigned int);
|
|
||||||
list.type = type;
|
|
||||||
list.length = 0;
|
|
||||||
res.ok = true;
|
|
||||||
res.list = list;
|
|
||||||
_frame_lists[slot].first_free = _frame_lists[slot].first_free->next;
|
|
||||||
out:
|
|
||||||
rtUnlockMutex(_frame_lists[slot].lock);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type,
|
|
||||||
unsigned int frame_id) {
|
|
||||||
return CreateNewList(type, frame_id, DEFAULT_LIST_CAPACITY);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ResetRenderLists(unsigned int frame_id) {
|
|
||||||
unsigned int slot = frame_id % _max_frames_in_flight;
|
|
||||||
RT_ASSERT(_frame_lists[slot].access_frame_id == frame_id ||
|
|
||||||
_frame_lists[slot].access_frame_id == 0,
|
|
||||||
"Frame id mismatch");
|
|
||||||
rtLockMutex(_frame_lists[slot].lock);
|
|
||||||
_frame_lists[slot].first_free = NULL;
|
|
||||||
_frame_lists[slot].access_frame_id = 0;
|
|
||||||
rtArenaClear(&_frame_lists[slot].arena);
|
|
||||||
rtUnlockMutex(_frame_lists[slot].lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object) {
|
|
||||||
size_t object_size = rtGetRenderObjectSize(list->type);
|
|
||||||
rt_list_pool *pool =
|
|
||||||
(rt_list_pool *)((char *)list->data - sizeof(rt_list_pool) - sizeof(unsigned int));
|
|
||||||
unsigned int frame_id = *(unsigned int *)((char *)list->data - sizeof(unsigned int));
|
|
||||||
size_t list_capacity = pool->capacity / object_size;
|
|
||||||
|
|
||||||
if (list->length == list_capacity) {
|
|
||||||
/* "Grow" the list */
|
|
||||||
rt_create_render_list_result list_res =
|
|
||||||
CreateNewList(list->type, frame_id, pool->capacity * 2);
|
|
||||||
if (!list_res.ok)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
memcpy(list_res.list.data, list->data, list->length * object_size);
|
|
||||||
|
|
||||||
unsigned int slot = frame_id % _max_frames_in_flight;
|
|
||||||
rtLockMutex(_frame_lists[slot].lock);
|
|
||||||
pool->next = _frame_lists[slot].first_free;
|
|
||||||
_frame_lists[slot].first_free = pool;
|
|
||||||
rtUnlockMutex(_frame_lists[slot].lock);
|
|
||||||
|
|
||||||
list_res.list.length = list->length;
|
|
||||||
*list = list_res.list;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *dst = (char *)list->data + list->length * object_size;
|
|
||||||
memcpy(dst, object, object_size);
|
|
||||||
++list->length;
|
|
||||||
return true;
|
|
||||||
}
|
|
@ -1,70 +0,0 @@
|
|||||||
#ifndef RT_RENDER_LIST_H
|
|
||||||
#define RT_RENDER_LIST_H
|
|
||||||
|
|
||||||
/* a render list collects render objects. */
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Identifies a type of render objects. */
|
|
||||||
typedef uint32_t rt_render_object_type;
|
|
||||||
typedef uint32_t rt_render_object_type_mask;
|
|
||||||
|
|
||||||
enum {
|
|
||||||
RT_INVALID_RENDER_OBJECT_TYPE = 0,
|
|
||||||
RT_MAX_RENDER_OBJECT_TYPE = 32,
|
|
||||||
};
|
|
||||||
|
|
||||||
#define RT_RENDER_OBJECT_TYPE_BIT(type) (1u << ((type)-1))
|
|
||||||
|
|
||||||
/* Registers a new render object type.
|
|
||||||
* debug_name is optional and may be NULL.
|
|
||||||
*/
|
|
||||||
RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size,
|
|
||||||
const char *debug_name);
|
|
||||||
|
|
||||||
RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type);
|
|
||||||
|
|
||||||
RT_DLLEXPORT const char *rtGetRenderObjectTypeDebugName(rt_render_object_type type);
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_render_object_type type;
|
|
||||||
size_t length;
|
|
||||||
void *data;
|
|
||||||
} rt_render_list;
|
|
||||||
|
|
||||||
/* Returns a pointer to the i-th render list element.
|
|
||||||
* Works for every valid type, because the size is determined at runtime. */
|
|
||||||
RT_INLINE void *rtGetRenderListElement(const rt_render_list *list, size_t index) {
|
|
||||||
size_t size = rtGetRenderObjectSize(list->type);
|
|
||||||
return (char *)list->data + size * index;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Returns the i-th render list element, cast to type T.
|
|
||||||
* Saves a rtGetRenderObjectSize call, if the type is known beforehand. */
|
|
||||||
#define RT_GET_RENDER_LIST_ELEMENT(list, T, index) *(((T *)(list).data) + (index))
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool ok;
|
|
||||||
rt_render_list list;
|
|
||||||
} rt_create_render_list_result;
|
|
||||||
|
|
||||||
/* Create a render list for a particular object type.
|
|
||||||
*
|
|
||||||
* Render Lists have a lifetime of one frame. */
|
|
||||||
RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type, unsigned int frame_id);
|
|
||||||
|
|
||||||
/* Append a render object to a list. The object must be of the correct type. */
|
|
||||||
RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,122 +0,0 @@
|
|||||||
#include "render_view.h"
|
|
||||||
#include "renderer_api.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_RenderViewArenaSize,
|
|
||||||
"Size of the memory arena used for allocating render views. Default: 1 MB",
|
|
||||||
RT_MB(1));
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_arena arena;
|
|
||||||
rt_mutex *lock;
|
|
||||||
uint32_t frame_id;
|
|
||||||
} rt_frame_views;
|
|
||||||
|
|
||||||
static rt_frame_views _frames[4];
|
|
||||||
static unsigned int _max_frames_in_flight;
|
|
||||||
|
|
||||||
rt_result InitRenderViews(void) {
|
|
||||||
_max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
|
|
||||||
RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frames),
|
|
||||||
"Invalid maximum number of in-flight frames.");
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
|
|
||||||
rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderViewArenaSize.i);
|
|
||||||
if (!arena_res.ok)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
_frames[i].arena = arena_res.arena;
|
|
||||||
_frames[i].lock = rtCreateMutex();
|
|
||||||
if (!_frames[i].lock) {
|
|
||||||
rtReleaseArena(&_frames[i].arena);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
_frames[i].frame_id = 0;
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownRenderViews(void) {
|
|
||||||
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
|
|
||||||
rtDestroyMutex(_frames[i].lock);
|
|
||||||
rtReleaseArena(&_frames[i].arena);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ResetRenderViews(unsigned int frame_id) {
|
|
||||||
unsigned int slot = frame_id % _max_frames_in_flight;
|
|
||||||
rtArenaClear(&_frames[slot].arena);
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types,
|
|
||||||
uint32_t type_count,
|
|
||||||
unsigned int frame_id) {
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
for (uint32_t i = 0; i < type_count - 1; ++i) {
|
|
||||||
for (uint32_t j = i + 1; j < type_count; ++j) {
|
|
||||||
RT_ASSERT(types[i] != types[j], "Duplicate render list type detected.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
unsigned int slot = frame_id % _max_frames_in_flight;
|
|
||||||
|
|
||||||
size_t size = type_count * (sizeof(rt_render_list) + sizeof(rt_render_list));
|
|
||||||
rtLockMutex(_frames[slot].lock);
|
|
||||||
void *storage = rtArenaPush(&_frames[slot].arena, size);
|
|
||||||
_frames[slot].frame_id = frame_id;
|
|
||||||
rtUnlockMutex(_frames[slot].lock);
|
|
||||||
if (!storage) {
|
|
||||||
return (rt_create_render_view_result){
|
|
||||||
.ok = false,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_view view;
|
|
||||||
view.lists = storage;
|
|
||||||
view.list_types = (rt_render_object_type *)(view.lists + type_count);
|
|
||||||
view.list_count = type_count;
|
|
||||||
view.type_mask = 0;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < type_count; ++i) {
|
|
||||||
rt_create_render_list_result list_res = rtCreateRenderList(types[i], frame_id);
|
|
||||||
if (!list_res.ok) {
|
|
||||||
return (rt_create_render_view_result){
|
|
||||||
.ok = false,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
view.lists[i] = list_res.list;
|
|
||||||
view.list_types[i] = types[i];
|
|
||||||
view.type_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (rt_create_render_view_result){.ok = true, .view = view};
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT bool
|
|
||||||
rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object) {
|
|
||||||
for (uint32_t i = 0; i < view->list_count; ++i) {
|
|
||||||
if (view->list_types[i] == type)
|
|
||||||
return rtPushRenderListEntry(&view->lists[i], object);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT void
|
|
||||||
rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id, unsigned int frame_id) {
|
|
||||||
g_renderer.SubmitRenderView(render_graph, pass_id, view, frame_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view,
|
|
||||||
const rt_render_object_type *types,
|
|
||||||
uint32_t type_count) {
|
|
||||||
if (view.list_count != type_count)
|
|
||||||
return 0;
|
|
||||||
for (uint32_t i = 0; i < type_count; ++i) {
|
|
||||||
if (view.list_types[i] != types[i])
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
@ -1,63 +0,0 @@
|
|||||||
#ifndef RT_GFX_RENDER_VIEW_H
|
|
||||||
#define RT_GFX_RENDER_VIEW_H
|
|
||||||
|
|
||||||
/* A render view acts as a container of one or more render lists.
|
|
||||||
* Each view is processed by exactly one pass. */
|
|
||||||
|
|
||||||
#include "render_list.h"
|
|
||||||
|
|
||||||
typedef struct rt_render_graph_s rt_render_graph;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_render_list *lists;
|
|
||||||
rt_render_object_type *list_types;
|
|
||||||
uint32_t list_count;
|
|
||||||
rt_render_object_type_mask type_mask;
|
|
||||||
} rt_render_view;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool ok;
|
|
||||||
rt_render_view view;
|
|
||||||
} rt_create_render_view_result;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types,
|
|
||||||
uint32_t type_count,
|
|
||||||
unsigned int frame_id);
|
|
||||||
|
|
||||||
RT_DLLEXPORT bool
|
|
||||||
rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object);
|
|
||||||
|
|
||||||
RT_DLLEXPORT void rtSubmitRenderView(rt_render_view view,
|
|
||||||
rt_render_graph *render_graph,
|
|
||||||
uint32_t pass_id,
|
|
||||||
unsigned int frame_id);
|
|
||||||
|
|
||||||
/* Checks if the view contains exactly the given types in the given order */
|
|
||||||
RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view,
|
|
||||||
const rt_render_object_type *types,
|
|
||||||
uint32_t type_count);
|
|
||||||
|
|
||||||
/* Checks if the view contains exactly the given types, in any order */
|
|
||||||
RT_INLINE static int
|
|
||||||
rtDoViewTypesMatch(rt_render_view view, const rt_render_object_type *types, uint32_t type_count) {
|
|
||||||
rt_render_object_type_mask in_mask = 0;
|
|
||||||
for (uint32_t i = 0; i < type_count; ++i) {
|
|
||||||
in_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]);
|
|
||||||
}
|
|
||||||
return view.type_mask == in_mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_INLINE static int rtDoesViewContainTypes(rt_render_view view,
|
|
||||||
rt_render_object_type_mask type_mask) {
|
|
||||||
return (int)(view.type_mask & type_mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,367 +0,0 @@
|
|||||||
#ifndef RT_GFX_BACKEND_H
|
|
||||||
#define RT_GFX_BACKEND_H
|
|
||||||
|
|
||||||
/* Backend functions and types. */
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#include "gfx.h"
|
|
||||||
#include "render_list.h"
|
|
||||||
#include "render_view.h"
|
|
||||||
|
|
||||||
#include "runtime/resources.h"
|
|
||||||
#include "runtime/rt_math.h"
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Handles for backend objects */
|
|
||||||
|
|
||||||
#define RT_RENDER_BACKEND_HANDLE_MAX_INDEX ((1u << 24) - 1)
|
|
||||||
#define RT_RENDER_BACKEND_HANDLE_MAX_VERSION 255
|
|
||||||
|
|
||||||
#define RT_RENDER_BACKEND_HANDLE(name) \
|
|
||||||
typedef struct { \
|
|
||||||
uint32_t version : 8; \
|
|
||||||
uint32_t index : 24; \
|
|
||||||
} name
|
|
||||||
|
|
||||||
RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle);
|
|
||||||
RT_RENDER_BACKEND_HANDLE(rt_render_target_handle);
|
|
||||||
RT_RENDER_BACKEND_HANDLE(rt_command_buffer_handle);
|
|
||||||
RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle);
|
|
||||||
RT_RENDER_BACKEND_HANDLE(rt_buffer_handle);
|
|
||||||
|
|
||||||
#undef RT_RENDER_BACKEND_HANDLE
|
|
||||||
|
|
||||||
#define RT_COMPARE_RENDER_HANDLES(_A, _B, _Comp) ((*(uint32_t *)&(_A)) _Comp (*(uint32_t *)&(_B)))
|
|
||||||
|
|
||||||
/* Init data for the renderer */
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
struct HINSTANCE__;
|
|
||||||
struct HWND__;
|
|
||||||
#elif defined(RT_USE_XLIB)
|
|
||||||
struct _XDisplay;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct rt_renderer_init_info_s {
|
|
||||||
#ifdef _WIN32
|
|
||||||
struct HINSTANCE__ *hInstance;
|
|
||||||
struct HWND__ *hWnd;
|
|
||||||
#elif defined(RT_USE_XLIB)
|
|
||||||
struct _XDisplay *display;
|
|
||||||
unsigned long window;
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Argument types for render commands */
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_GRAPHICS_QUEUE,
|
|
||||||
RT_COMPUTE_QUEUE,
|
|
||||||
RT_TRANSFER_QUEUE,
|
|
||||||
} rt_gpu_queue;
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* Attributes are used to bind buffers (or textures) to symbolic values.
|
|
||||||
* For example, an attribute might be bound to "CELL_GRID", which would be
|
|
||||||
* replaced with the (at the time of the invoke) grid buffer of the current
|
|
||||||
* world cell.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_ATTRIBUTE_VALUE_UNDEFINED,
|
|
||||||
|
|
||||||
RT_ATTRIBUTE_VALUE_MATERIAL_ALBEDO,
|
|
||||||
RT_ATTRIBUTE_VALUE_MATERIAL_NORMAL,
|
|
||||||
|
|
||||||
RT_ATTRIBUTE_VALUE_count
|
|
||||||
} rt_attribute_value;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t index;
|
|
||||||
rt_attribute_value value;
|
|
||||||
} rt_attribute_binding;
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_SHADER_TYPE_INVALID,
|
|
||||||
RT_SHADER_TYPE_VULKAN,
|
|
||||||
RT_SHADER_TYPE_DX11,
|
|
||||||
|
|
||||||
RT_SHADER_TYPE_count,
|
|
||||||
} rt_shader_type;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_SHADER_STAGE_VERTEX,
|
|
||||||
RT_SHADER_STAGE_FRAGMENT,
|
|
||||||
RT_SHADER_STAGE_COMPUTE,
|
|
||||||
|
|
||||||
RT_SHADER_STAGE_count,
|
|
||||||
} rt_shader_stage;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_shader_type type;
|
|
||||||
rt_shader_stage stage;
|
|
||||||
rt_relptr bytecode;
|
|
||||||
size_t bytecode_length;
|
|
||||||
} rt_shader_info;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_gpu_queue target_queue;
|
|
||||||
} rt_alloc_command_buffer_info;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const rt_command_buffer_handle *command_buffers;
|
|
||||||
const rt_gpu_semaphore_handle *wait_semaphores;
|
|
||||||
const uint64_t *wait_values;
|
|
||||||
const rt_gpu_semaphore_handle *signal_semaphores;
|
|
||||||
const uint64_t *signal_values;
|
|
||||||
uint32_t command_buffer_count;
|
|
||||||
uint32_t wait_semaphore_count;
|
|
||||||
uint32_t signal_semaphore_count;
|
|
||||||
} rt_submit_command_buffers_info;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
/* Optional, for debug purposes */
|
|
||||||
const char *name;
|
|
||||||
uint64_t initial_value;
|
|
||||||
} rt_gpu_semaphore_info;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_BUFFER_TYPE_VERTEX,
|
|
||||||
RT_BUFFER_TYPE_INDEX,
|
|
||||||
RT_BUFFER_TYPE_UNIFORM,
|
|
||||||
RT_BUFFER_TYPE_STORAGE,
|
|
||||||
RT_BUFFER_TYPE_count
|
|
||||||
} rt_buffer_type;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
/* Create once, never change the data. */
|
|
||||||
RT_BUFFER_USAGE_STATIC,
|
|
||||||
|
|
||||||
/* Update occasionally (after a number of frames) */
|
|
||||||
RT_BUFFER_USAGE_DYNAMIC,
|
|
||||||
|
|
||||||
/* Create, use once and then discard */
|
|
||||||
RT_BUFFER_USAGE_TRANSIENT,
|
|
||||||
|
|
||||||
RT_BUFFER_USAGE_count,
|
|
||||||
} rt_buffer_usage;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
size_t size;
|
|
||||||
rt_buffer_type type;
|
|
||||||
rt_buffer_usage usage;
|
|
||||||
const void *data;
|
|
||||||
} rt_buffer_info;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_PASS_LOAD_MODE_LOAD,
|
|
||||||
RT_PASS_LOAD_MODE_CLEAR,
|
|
||||||
} rt_pass_load_mode;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_PASS_WRITE_MODE_STORE,
|
|
||||||
RT_PASS_WRITE_MODE_DISCARD,
|
|
||||||
} rt_pass_write_mode;
|
|
||||||
|
|
||||||
typedef union {
|
|
||||||
rt_color color;
|
|
||||||
struct {
|
|
||||||
float depth;
|
|
||||||
int32_t stencil;
|
|
||||||
} depth_stencil;
|
|
||||||
} rt_pass_clear_value;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
float depth;
|
|
||||||
int32_t stencil;
|
|
||||||
} rt_depth_stencil_value;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_render_target_handle color_buffers[4];
|
|
||||||
rt_pass_load_mode color_buffer_loads[4];
|
|
||||||
rt_pass_write_mode color_buffer_writes[4];
|
|
||||||
rt_pass_clear_value color_buffer_clear_values[4];
|
|
||||||
uint32_t color_buffer_count;
|
|
||||||
|
|
||||||
rt_render_target_handle depth_stencil_buffer;
|
|
||||||
rt_pass_load_mode depth_stencil_buffer_load;
|
|
||||||
rt_pass_write_mode depth_stencil_buffer_write;
|
|
||||||
rt_pass_clear_value depth_stencil_buffer_clear_value;
|
|
||||||
|
|
||||||
rt_rect2i render_area;
|
|
||||||
|
|
||||||
// For debug purposes, can be NULL
|
|
||||||
const char *name;
|
|
||||||
} rt_cmd_begin_pass_info;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
/* Unusable, must be transitioned to an usable state first. */
|
|
||||||
RT_RENDER_TARGET_STATE_INVALID,
|
|
||||||
|
|
||||||
/* Used as a color- or depth-buffer */
|
|
||||||
RT_RENDER_TARGET_STATE_ATTACHMENT,
|
|
||||||
|
|
||||||
RT_RENDER_TARGET_STATE_SAMPLED_IMAGE,
|
|
||||||
|
|
||||||
RT_RENDER_TARGET_STATE_STORAGE_IMAGE,
|
|
||||||
} rt_render_target_state;
|
|
||||||
|
|
||||||
#define RT_RENDER_TARGET_SIZE_SWAPCHAIN 0
|
|
||||||
|
|
||||||
/* Renderer API */
|
|
||||||
|
|
||||||
typedef struct rt_pipeline_info_s rt_pipeline_info;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const char *name;
|
|
||||||
rt_pixel_format format;
|
|
||||||
unsigned int width;
|
|
||||||
unsigned int height;
|
|
||||||
|
|
||||||
unsigned int samples;
|
|
||||||
unsigned int layers;
|
|
||||||
} rt_attachment_info;
|
|
||||||
|
|
||||||
enum {
|
|
||||||
/* Bit 0 contains the type: 0 -> graphics, 1 -> compute */
|
|
||||||
RT_PASS_FLAG_GRAPHICS = 0x0000,
|
|
||||||
RT_PASS_FLAG_COMPUTE = 0x0001,
|
|
||||||
RT_PASS_FLAG_TYPE_MASK = RT_PASS_FLAG_COMPUTE | RT_PASS_FLAG_GRAPHICS,
|
|
||||||
|
|
||||||
/* Always excecute the pass, even if no objects will be rendered. */
|
|
||||||
RT_PASS_FLAG_EXECUTE_ALWAYS = 0x0002,
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const char *name;
|
|
||||||
uint32_t flags;
|
|
||||||
} rt_pass_info;
|
|
||||||
|
|
||||||
typedef struct rt_render_graph_s rt_render_graph;
|
|
||||||
typedef rt_result rt_execute_render_pass_fn(uint32_t pass_id,
|
|
||||||
rt_command_buffer_handle cmdbuf,
|
|
||||||
const rt_render_view *views,
|
|
||||||
unsigned int view_count,
|
|
||||||
void *userdata);
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
void *obj;
|
|
||||||
|
|
||||||
void (*AddRenderTarget)(void *obj, const rt_attachment_info *info);
|
|
||||||
|
|
||||||
void (*SetBackbuffer)(void *obj, const char *rt_name);
|
|
||||||
|
|
||||||
void (*AddRenderPass)(void *obj, const rt_pass_info *info);
|
|
||||||
void (*AddColorOutput)(void *obj,
|
|
||||||
const char *pass_name,
|
|
||||||
const char *rt_name,
|
|
||||||
rt_pass_load_mode load,
|
|
||||||
rt_pass_write_mode write,
|
|
||||||
rt_color clear_color);
|
|
||||||
void (*AddSampledInput)(void *obj, const char *pass_name, const char *rt_name);
|
|
||||||
void (*SetDepthStencilAttachment)(void *obj,
|
|
||||||
const char *pass_name,
|
|
||||||
const char *rt_name,
|
|
||||||
rt_pass_load_mode load,
|
|
||||||
rt_pass_write_mode write,
|
|
||||||
rt_depth_stencil_value clear_value);
|
|
||||||
void (*SetRenderArea)(void *obj, const char *pass_name, rt_rect2 area, float min_depth, float max_depth);
|
|
||||||
void (*BindRenderPass)(void *obj,
|
|
||||||
const char *pass_name,
|
|
||||||
rt_execute_render_pass_fn *execute_fn,
|
|
||||||
void *userdata);
|
|
||||||
|
|
||||||
rt_result (*Build)(void *obj, rt_render_graph **p_render_graph);
|
|
||||||
} rt_render_graph_builder;
|
|
||||||
|
|
||||||
typedef void rt_register_renderer_cvars_fn(void);
|
|
||||||
typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info);
|
|
||||||
typedef void rt_shutdown_renderer_fn(void);
|
|
||||||
typedef unsigned int rt_get_max_frames_in_flight_fn(void);
|
|
||||||
typedef void rt_begin_frame_fn(unsigned int frame_id);
|
|
||||||
typedef void rt_end_frame_fn(unsigned int frame_id);
|
|
||||||
typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info);
|
|
||||||
typedef void rt_destroy_pipeline_fn(rt_pipeline_handle handle);
|
|
||||||
typedef rt_result rt_alloc_command_buffers_fn(uint32_t count,
|
|
||||||
const rt_alloc_command_buffer_info *info,
|
|
||||||
rt_command_buffer_handle *p_command_buffers);
|
|
||||||
typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue,
|
|
||||||
const rt_submit_command_buffers_info *info);
|
|
||||||
typedef rt_result
|
|
||||||
rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers);
|
|
||||||
typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers);
|
|
||||||
|
|
||||||
typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void);
|
|
||||||
typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder);
|
|
||||||
typedef rt_result rt_execute_render_graph_fn(rt_render_graph *rgraph, unsigned int frame_id);
|
|
||||||
typedef void
|
|
||||||
rt_submit_render_view_fn(rt_render_graph *render_graph, uint32_t pass_id, rt_render_view view, unsigned int frame_id);
|
|
||||||
typedef void rt_reset_render_graph_fn(rt_render_graph *graph, unsigned int frame_id);
|
|
||||||
|
|
||||||
typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf,
|
|
||||||
const rt_cmd_begin_pass_info *info);
|
|
||||||
typedef void rt_cmd_end_pass_fn(rt_command_buffer_handle cmdbuf);
|
|
||||||
typedef void rt_cmd_transition_render_target_fn(rt_command_buffer_handle cmdbuf,
|
|
||||||
rt_render_target_handle render_target,
|
|
||||||
rt_render_target_state new_state);
|
|
||||||
typedef void rt_cmd_flush_render_target_write_fn(rt_command_buffer_handle cmdbuf,
|
|
||||||
rt_render_target_handle render_target);
|
|
||||||
typedef void rt_cmd_bind_pipeline_fn(rt_command_buffer_handle cmd, rt_pipeline_handle pipeline);
|
|
||||||
typedef void rt_cmd_bind_vertex_buffers_fn(rt_command_buffer_handle cmd,
|
|
||||||
uint32_t first_binding,
|
|
||||||
uint32_t count,
|
|
||||||
const rt_buffer_handle *buffers,
|
|
||||||
const uint32_t *strides,
|
|
||||||
const uint32_t *offsets);
|
|
||||||
typedef void
|
|
||||||
rt_cmd_draw_fn(rt_command_buffer_handle cmdbuf, uint32_t first_vertex, uint32_t vertex_count);
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_register_renderer_cvars_fn *RegisterCVars;
|
|
||||||
rt_init_renderer_fn *Init;
|
|
||||||
rt_shutdown_renderer_fn *Shutdown;
|
|
||||||
rt_get_max_frames_in_flight_fn *GetMaxFramesInFlight;
|
|
||||||
rt_begin_frame_fn *BeginFrame;
|
|
||||||
rt_end_frame_fn *EndFrame;
|
|
||||||
rt_compile_pipeline_fn *CompilePipeline;
|
|
||||||
rt_destroy_pipeline_fn *DestroyPipeline;
|
|
||||||
rt_alloc_command_buffers_fn *AllocCommandBuffers;
|
|
||||||
rt_submit_command_buffers_fn *SubmitCommandBuffers;
|
|
||||||
rt_create_buffers_fn *CreateBuffers;
|
|
||||||
rt_destroy_buffers_fn *DestroyBuffers;
|
|
||||||
|
|
||||||
/*render graph functions*/
|
|
||||||
rt_create_render_graph_builder_fn *CreateRenderGraphBuilder;
|
|
||||||
rt_destroy_render_graph_builder_fn *DestroyRenderGraphBuilder;
|
|
||||||
rt_execute_render_graph_fn *ExecuteRenderGraph;
|
|
||||||
rt_submit_render_view_fn *SubmitRenderView;
|
|
||||||
rt_reset_render_graph_fn *ResetRenderGraph;
|
|
||||||
|
|
||||||
/* Command Buffer Functions */
|
|
||||||
rt_cmd_begin_pass_fn *CmdBeginPass;
|
|
||||||
rt_cmd_end_pass_fn *CmdEndPass;
|
|
||||||
rt_cmd_transition_render_target_fn *CmdTransitionRenderTarget;
|
|
||||||
rt_cmd_flush_render_target_write_fn *CmdFlushRenderTargetWrite;
|
|
||||||
rt_cmd_bind_pipeline_fn *CmdBindPipeline;
|
|
||||||
rt_cmd_bind_vertex_buffers_fn *CmdBindVertexBuffers;
|
|
||||||
rt_cmd_draw_fn *CmdDraw;
|
|
||||||
} rt_renderer_api;
|
|
||||||
|
|
||||||
#define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name
|
|
||||||
|
|
||||||
#ifndef RT_DONT_DEFINE_RENDERER_GLOBAL
|
|
||||||
extern rt_renderer_api g_renderer;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,9 +1,3 @@
|
|||||||
subdir('runtime')
|
subdir('runtime')
|
||||||
subdir('asset_compiler')
|
subdir('asset_compiler')
|
||||||
subdir('gfx')
|
|
||||||
subdir('app_framework')
|
subdir('app_framework')
|
||||||
|
|
||||||
# Renderer libs
|
|
||||||
subdir('renderer/vk')
|
|
||||||
subdir('renderer/null')
|
|
||||||
subdir('renderer/dx11')
|
|
||||||
|
@ -1,877 +0,0 @@
|
|||||||
#include <stdbool.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "gfx/effect.h"
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/buffer_manager.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
|
|
||||||
#include "common_render_graph.h"
|
|
||||||
|
|
||||||
#define MAX_COLOR_ATTACHMENTS_PER_PASS 8
|
|
||||||
#define MAX_SAMPLED_INPUTS_PER_PASS 8
|
|
||||||
|
|
||||||
typedef struct rt_render_target_build_info {
|
|
||||||
const char *name;
|
|
||||||
rt_pixel_format format;
|
|
||||||
unsigned int width;
|
|
||||||
unsigned int height;
|
|
||||||
|
|
||||||
unsigned int samples;
|
|
||||||
unsigned int layers;
|
|
||||||
|
|
||||||
uint32_t first_usage;
|
|
||||||
uint32_t last_usage;
|
|
||||||
} rt_render_target_build_info;
|
|
||||||
|
|
||||||
typedef struct rt_pass_build_info {
|
|
||||||
const char *name;
|
|
||||||
uint32_t flags;
|
|
||||||
|
|
||||||
void *userdata;
|
|
||||||
rt_execute_render_pass_fn *Execute;
|
|
||||||
|
|
||||||
rt_rect2 render_area;
|
|
||||||
float min_depth;
|
|
||||||
float max_depth;
|
|
||||||
|
|
||||||
uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
|
||||||
rt_color color_attachment_clear_values[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
|
||||||
rt_pass_load_mode color_attachment_loads[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
|
||||||
rt_pass_write_mode color_attachment_writes[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
|
||||||
uint32_t color_attachment_count;
|
|
||||||
|
|
||||||
uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS];
|
|
||||||
uint32_t sampled_input_count;
|
|
||||||
|
|
||||||
uint32_t depth_stencil_attachment;
|
|
||||||
rt_depth_stencil_value depth_stencil_clear_value;
|
|
||||||
rt_pass_load_mode depth_stencil_load;
|
|
||||||
rt_pass_write_mode depth_stencil_write;
|
|
||||||
|
|
||||||
uint32_t *dependencies;
|
|
||||||
uint32_t dependency_count;
|
|
||||||
} rt_pass_build_info;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t signaled_by;
|
|
||||||
uint32_t waited_on_by;
|
|
||||||
} rt_sync_point_build_info;
|
|
||||||
|
|
||||||
typedef struct rt_render_graph_builder_obj {
|
|
||||||
rt_arena arena;
|
|
||||||
|
|
||||||
rt_render_target_build_info *render_targets;
|
|
||||||
uint32_t render_target_count;
|
|
||||||
uint32_t render_target_capacity;
|
|
||||||
|
|
||||||
rt_pass_build_info *passes;
|
|
||||||
uint32_t pass_count;
|
|
||||||
uint32_t pass_capacity;
|
|
||||||
|
|
||||||
rt_physical_render_target_info *phys_render_targets;
|
|
||||||
uint32_t phys_render_target_count;
|
|
||||||
|
|
||||||
rt_sync_point_build_info *sync_points;
|
|
||||||
uint32_t sync_point_count;
|
|
||||||
|
|
||||||
uint32_t backbuffer;
|
|
||||||
|
|
||||||
rt_render_graph_builder_platform_callbacks platform_cbs;
|
|
||||||
} rt_render_graph_builder_obj;
|
|
||||||
|
|
||||||
static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
|
|
||||||
if (obj->render_target_count == obj->render_target_capacity) {
|
|
||||||
uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32;
|
|
||||||
rt_render_target_build_info *tmp =
|
|
||||||
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_render_target_build_info, new_cap);
|
|
||||||
if (obj->render_target_capacity)
|
|
||||||
memcpy(tmp,
|
|
||||||
obj->render_targets,
|
|
||||||
sizeof(rt_render_target_build_info) * obj->render_target_capacity);
|
|
||||||
obj->render_targets = tmp;
|
|
||||||
obj->render_target_capacity = new_cap;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
|
|
||||||
strcpy(name, info->name);
|
|
||||||
obj->render_targets[obj->render_target_count].name = name;
|
|
||||||
obj->render_targets[obj->render_target_count].format = info->format;
|
|
||||||
obj->render_targets[obj->render_target_count].width = info->width;
|
|
||||||
obj->render_targets[obj->render_target_count].height = info->height;
|
|
||||||
obj->render_targets[obj->render_target_count].samples = info->samples;
|
|
||||||
obj->render_targets[obj->render_target_count].layers = info->layers;
|
|
||||||
obj->render_targets[obj->render_target_count].first_usage = 0;
|
|
||||||
obj->render_targets[obj->render_target_count].last_usage = 0;
|
|
||||||
++obj->render_target_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void SetBackbuffer(void *_obj, const char *rt_name) {
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
|
||||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
|
||||||
obj->backbuffer = i;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void AddRenderPass(void *_obj, const rt_pass_info *info) {
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
|
|
||||||
if (obj->pass_count == obj->pass_capacity) {
|
|
||||||
uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32;
|
|
||||||
rt_pass_build_info *tmp =
|
|
||||||
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_pass_build_info, new_cap);
|
|
||||||
if (obj->pass_capacity)
|
|
||||||
memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity);
|
|
||||||
obj->passes = tmp;
|
|
||||||
obj->pass_capacity = new_cap;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
|
|
||||||
strcpy(name, info->name);
|
|
||||||
obj->passes[obj->pass_count].name = name;
|
|
||||||
obj->passes[obj->pass_count].flags = info->flags;
|
|
||||||
obj->passes[obj->pass_count].color_attachment_count = 0;
|
|
||||||
obj->passes[obj->pass_count].sampled_input_count = 0;
|
|
||||||
obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX;
|
|
||||||
obj->passes[obj->pass_count].dependencies = NULL;
|
|
||||||
obj->passes[obj->pass_count].dependency_count = 0;
|
|
||||||
|
|
||||||
++obj->pass_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void AddColorOutput(void *_obj,
|
|
||||||
const char *pass_name,
|
|
||||||
const char *rt_name,
|
|
||||||
rt_pass_load_mode load,
|
|
||||||
rt_pass_write_mode write,
|
|
||||||
rt_color clear_color) {
|
|
||||||
uint32_t rt_index = UINT_MAX;
|
|
||||||
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
|
||||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
|
||||||
rt_index = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (rt_index == UINT_MAX) {
|
|
||||||
rtLog("ren",
|
|
||||||
"Tried to add unknown render target %s as color output to %s",
|
|
||||||
rt_name,
|
|
||||||
pass_name);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
|
||||||
if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) {
|
|
||||||
rtLog("ren", "Too many color attachments in pass %s", pass_name);
|
|
||||||
}
|
|
||||||
obj->passes[i].color_attachment_clear_values[obj->passes[i].color_attachment_count] =
|
|
||||||
clear_color;
|
|
||||||
obj->passes[i].color_attachment_loads[obj->passes[i].color_attachment_count] = load;
|
|
||||||
obj->passes[i].color_attachment_writes[obj->passes[i].color_attachment_count] = write;
|
|
||||||
obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rtLog("ren",
|
|
||||||
"Tried to add render target %s as color output to unknown render target %s",
|
|
||||||
rt_name,
|
|
||||||
pass_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) {
|
|
||||||
uint32_t rt_index = UINT_MAX;
|
|
||||||
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
|
||||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
|
||||||
rt_index = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (rt_index == UINT_MAX) {
|
|
||||||
rtLog("ren",
|
|
||||||
"Tried to add unknown render target %s as color output to %s",
|
|
||||||
rt_name,
|
|
||||||
pass_name);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
|
||||||
if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) {
|
|
||||||
rtLog("ren", "Too many sampled inputs in pass %s", pass_name);
|
|
||||||
}
|
|
||||||
obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rtLog("ren",
|
|
||||||
"Tried to add render target %s as sampled input to unknown render target %s",
|
|
||||||
rt_name,
|
|
||||||
pass_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void SetDepthStencilAttachment(void *_obj,
|
|
||||||
const char *pass_name,
|
|
||||||
const char *rt_name,
|
|
||||||
rt_pass_load_mode load,
|
|
||||||
rt_pass_write_mode write,
|
|
||||||
rt_depth_stencil_value clear_value) {
|
|
||||||
uint32_t rt_index = UINT_MAX;
|
|
||||||
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
|
||||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
|
||||||
rt_index = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (rt_index == UINT_MAX) {
|
|
||||||
rtLog("ren",
|
|
||||||
"Tried to add unknown render target %s as depth stencil attachment to %s",
|
|
||||||
rt_name,
|
|
||||||
pass_name);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
|
||||||
obj->passes[i].depth_stencil_attachment = rt_index;
|
|
||||||
obj->passes[i].depth_stencil_clear_value = clear_value;
|
|
||||||
obj->passes[i].depth_stencil_load = load;
|
|
||||||
obj->passes[i].depth_stencil_write = write;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rtLog("ren",
|
|
||||||
"Tried to add render target %s as depth stencil attachment to unknown render target %s",
|
|
||||||
rt_name,
|
|
||||||
pass_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void SetRenderArea(void *_obj, const char *pass_name, rt_rect2 area, float min_depth, float max_depth) {
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
|
||||||
obj->passes[i].render_area = area;
|
|
||||||
obj->passes[i].min_depth = min_depth;
|
|
||||||
obj->passes[i].max_depth = max_depth;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rtLog("ren", "Tried to bind unknown render pass %s.", pass_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void BindRenderPass(void *_obj,
|
|
||||||
const char *pass_name,
|
|
||||||
rt_execute_render_pass_fn *execute_fn,
|
|
||||||
void *userdata) {
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
|
||||||
obj->passes[i].Execute = execute_fn;
|
|
||||||
obj->passes[i].userdata = userdata;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rtLog("ren", "Tried to bind unknown render pass %s.", pass_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t added;
|
|
||||||
uint32_t moved;
|
|
||||||
} rt_find_writers_result;
|
|
||||||
|
|
||||||
static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj,
|
|
||||||
uint32_t rt_index,
|
|
||||||
uint32_t append_at,
|
|
||||||
uint32_t *p_passes) {
|
|
||||||
rt_find_writers_result res = {0, 0};
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
bool writes_rt = false;
|
|
||||||
if (obj->passes[i].depth_stencil_attachment == rt_index) {
|
|
||||||
writes_rt = true;
|
|
||||||
} else {
|
|
||||||
for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) {
|
|
||||||
if (obj->passes[i].color_attachments[j] == rt_index) {
|
|
||||||
writes_rt = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!writes_rt)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
uint32_t lower_index = UINT32_MAX;
|
|
||||||
for (uint32_t j = 0; j < append_at; ++j) {
|
|
||||||
if (p_passes[j] == i) {
|
|
||||||
lower_index = j;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lower_index == UINT32_MAX) {
|
|
||||||
p_passes[append_at++] = i;
|
|
||||||
res.added++;
|
|
||||||
} else {
|
|
||||||
memmove(&p_passes[lower_index],
|
|
||||||
&p_passes[lower_index + 1],
|
|
||||||
(append_at - lower_index - 1) * sizeof(uint32_t));
|
|
||||||
p_passes[append_at - 1] = i;
|
|
||||||
res.moved++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj,
|
|
||||||
uint32_t search_rt,
|
|
||||||
uint32_t append_at,
|
|
||||||
uint32_t *p_order) {
|
|
||||||
rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order);
|
|
||||||
uint32_t new_append = append_at + writers.added;
|
|
||||||
for (uint32_t i = 0; i < writers.moved; ++i) {
|
|
||||||
uint32_t pass_idx = p_order[append_at - writers.moved + i];
|
|
||||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
|
||||||
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
|
|
||||||
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (uint32_t i = 0; i < writers.added; ++i) {
|
|
||||||
uint32_t pass_idx = p_order[append_at + i];
|
|
||||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
|
||||||
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
|
|
||||||
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new_append;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result
|
|
||||||
CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) {
|
|
||||||
uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count);
|
|
||||||
if (!order)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order);
|
|
||||||
|
|
||||||
/* Now the pass writing the backbuffer is first, we need to revert the order */
|
|
||||||
for (uint32_t i = 0; i < count / 2; ++i) {
|
|
||||||
uint32_t t = order[i];
|
|
||||||
order[i] = order[count - i - 1];
|
|
||||||
order[count - i - 1] = t;
|
|
||||||
}
|
|
||||||
*p_order = order;
|
|
||||||
*p_count = count;
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t *
|
|
||||||
ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) {
|
|
||||||
/* Our goal is to calculate a schedule that:
|
|
||||||
* A) Does not break the dependency chain
|
|
||||||
* B) Has the maximum amount of overlap, i.e. keeps the GPU busy.
|
|
||||||
* This means that if pass A depends on pass B, we want to have as much passes inbetween as
|
|
||||||
* possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */
|
|
||||||
uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count);
|
|
||||||
if (!schedule)
|
|
||||||
return NULL;
|
|
||||||
uint32_t scheduled_count = 0;
|
|
||||||
|
|
||||||
while (scheduled_count < pass_count) {
|
|
||||||
/* The number of passes remaining in naive_order */
|
|
||||||
uint32_t unscheduled_count = pass_count - scheduled_count;
|
|
||||||
|
|
||||||
/* It is always valid to use the front */
|
|
||||||
uint32_t selected_idx = 0;
|
|
||||||
uint32_t selected_score = 0;
|
|
||||||
for (uint32_t i = 0; i < unscheduled_count; ++i) {
|
|
||||||
/* Check if any dependency is not scheduled yet */
|
|
||||||
uint32_t pass_idx = naive_order[i];
|
|
||||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
|
||||||
uint32_t score = 0;
|
|
||||||
bool is_valid = true;
|
|
||||||
|
|
||||||
if (pass->dependency_count) {
|
|
||||||
for (uint32_t j = 0; j < unscheduled_count; ++j) {
|
|
||||||
uint32_t pass2_idx = naive_order[j];
|
|
||||||
for (uint32_t k = 0; k < pass->dependency_count; ++k) {
|
|
||||||
if (pass->dependencies[k] == pass2_idx) {
|
|
||||||
is_valid = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!is_valid)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!is_valid)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (uint32_t j = 0; j < pass->dependency_count; ++j) {
|
|
||||||
for (uint32_t k = 0; k < scheduled_count; ++k) {
|
|
||||||
if (schedule[k] == pass->dependencies[j]) {
|
|
||||||
score += scheduled_count - k;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
score = UINT32_MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (score > selected_score) {
|
|
||||||
selected_score = score;
|
|
||||||
selected_idx = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
schedule[scheduled_count++] = naive_order[selected_idx];
|
|
||||||
memmove(&naive_order[selected_idx],
|
|
||||||
&naive_order[selected_idx + 1],
|
|
||||||
(unscheduled_count - selected_idx - 1) * sizeof(uint32_t));
|
|
||||||
}
|
|
||||||
return schedule;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) {
|
|
||||||
/* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine
|
|
||||||
* the two */
|
|
||||||
for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) {
|
|
||||||
rt_pass_build_info *pass = &obj->passes[pass_idx];
|
|
||||||
uint32_t dependency_capacity = pass->sampled_input_count;
|
|
||||||
if (dependency_capacity) {
|
|
||||||
pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity);
|
|
||||||
if (!pass->dependencies)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) {
|
|
||||||
uint32_t rt_index = pass->sampled_inputs[input_idx];
|
|
||||||
for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) {
|
|
||||||
const rt_pass_build_info *candidate = &obj->passes[candidate_idx];
|
|
||||||
bool is_dependency = false;
|
|
||||||
if (candidate->depth_stencil_attachment == rt_index)
|
|
||||||
is_dependency = true;
|
|
||||||
for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) {
|
|
||||||
if (candidate->color_attachments[j] == rt_index)
|
|
||||||
is_dependency = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!is_dependency)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (pass->dependency_count == dependency_capacity) {
|
|
||||||
/* The dependencies are still on top of the arena, so we can just grow that
|
|
||||||
* array */
|
|
||||||
if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity))
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
dependency_capacity *= 2;
|
|
||||||
}
|
|
||||||
pass->dependencies[pass->dependency_count++] = candidate_idx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj,
|
|
||||||
uint32_t pass_count,
|
|
||||||
const uint32_t *schedule) {
|
|
||||||
for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) {
|
|
||||||
rt_render_target_build_info *rt = &obj->render_targets[rt_idx];
|
|
||||||
rt->first_usage = UINT32_MAX;
|
|
||||||
rt->last_usage = 0;
|
|
||||||
for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) {
|
|
||||||
uint32_t pass_idx = schedule[sched_idx];
|
|
||||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
|
||||||
bool usage = pass->depth_stencil_attachment == rt_idx;
|
|
||||||
if (!usage) {
|
|
||||||
for (unsigned int i = 0; i < pass->color_attachment_count; ++i) {
|
|
||||||
if (pass->color_attachments[i] == rt_idx)
|
|
||||||
usage = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!usage) {
|
|
||||||
for (unsigned int i = 0; i < pass->sampled_input_count; ++i) {
|
|
||||||
if (pass->sampled_inputs[i] == rt_idx)
|
|
||||||
usage = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (usage) {
|
|
||||||
if (sched_idx < rt->first_usage)
|
|
||||||
rt->first_usage = sched_idx;
|
|
||||||
if (sched_idx > rt->last_usage)
|
|
||||||
rt->last_usage = sched_idx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result GreedyMergeRenderTargets(rt_render_graph_builder_obj *obj) {
|
|
||||||
typedef struct {
|
|
||||||
rt_physical_render_target_info info;
|
|
||||||
int alive;
|
|
||||||
int backbuffer;
|
|
||||||
uint32_t first_usage;
|
|
||||||
uint32_t last_usage;
|
|
||||||
} merged_rts;
|
|
||||||
|
|
||||||
merged_rts *merged = RT_ARENA_PUSH_ARRAY(&obj->arena, merged_rts, 2 * obj->render_target_count);
|
|
||||||
if (!merged) {
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
uint32_t candidate_count = obj->render_target_count;
|
|
||||||
for (uint32_t i = 0; i < candidate_count; ++i) {
|
|
||||||
merged[i].alive = 1;
|
|
||||||
merged[i].backbuffer = (i == obj->backbuffer);
|
|
||||||
merged[i].info.format = obj->render_targets[i].format;
|
|
||||||
merged[i].info.width = obj->render_targets[i].width;
|
|
||||||
merged[i].info.height = obj->render_targets[i].height;
|
|
||||||
merged[i].info.layers = obj->render_targets[i].layers;
|
|
||||||
merged[i].info.name = obj->render_targets[i].name;
|
|
||||||
merged[i].info.samples = obj->render_targets[i].samples;
|
|
||||||
merged[i].first_usage = obj->render_targets[i].first_usage;
|
|
||||||
merged[i].last_usage = obj->render_targets[i].last_usage;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t *rt_mapping =
|
|
||||||
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->render_target_count);
|
|
||||||
if (!rt_mapping)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
for (uint32_t i = 0; i < obj->render_target_count; ++i)
|
|
||||||
rt_mapping[i] = i;
|
|
||||||
|
|
||||||
bool did_merge;
|
|
||||||
do {
|
|
||||||
did_merge = false;
|
|
||||||
for (uint32_t first = 0; first < candidate_count - 1; ++first) {
|
|
||||||
if (!merged[first].alive)
|
|
||||||
continue;
|
|
||||||
for (uint32_t second = first + 1; second < candidate_count; ++second) {
|
|
||||||
if (!merged[second].alive)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!((merged[first].last_usage < merged[second].first_usage) ||
|
|
||||||
(merged[second].last_usage < merged[first].first_usage)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!(merged[first].info.width == merged[second].info.width &&
|
|
||||||
merged[first].info.height == merged[second].info.height &&
|
|
||||||
merged[first].info.samples == merged[second].info.samples &&
|
|
||||||
merged[first].info.layers == merged[second].info.layers &&
|
|
||||||
merged[first].info.format == merged[second].info.format))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
merged[first].alive = 0;
|
|
||||||
merged[second].alive = 0;
|
|
||||||
|
|
||||||
merged_rts combined = {
|
|
||||||
.alive = 1,
|
|
||||||
.backbuffer = merged[first].backbuffer || merged[second].backbuffer,
|
|
||||||
.first_usage = RT_MIN(merged[first].first_usage, merged[second].first_usage),
|
|
||||||
.last_usage = RT_MAX(merged[first].last_usage, merged[second].last_usage),
|
|
||||||
.info = merged[first].info,
|
|
||||||
};
|
|
||||||
char *combined_name = rtArenaPush(&obj->arena,
|
|
||||||
strlen(merged[first].info.name) +
|
|
||||||
strlen(merged[second].info.name) + 2);
|
|
||||||
if (!combined_name)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
strcpy(combined_name, merged[first].info.name);
|
|
||||||
strcat(combined_name, "+");
|
|
||||||
strcat(combined_name, merged[second].info.name);
|
|
||||||
combined.info.name = combined_name;
|
|
||||||
|
|
||||||
/* Update mappings. If indes < render_target_count, than it refers to a
|
|
||||||
* logical render target. If not, it refers to a merged render target */
|
|
||||||
if (first < obj->render_target_count) {
|
|
||||||
rt_mapping[first] = candidate_count;
|
|
||||||
} else {
|
|
||||||
// Find mappings that refer to this index and update them
|
|
||||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
|
||||||
if (rt_mapping[i] == first)
|
|
||||||
rt_mapping[i] = candidate_count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (second < obj->render_target_count) {
|
|
||||||
rt_mapping[second] = candidate_count;
|
|
||||||
} else {
|
|
||||||
// Find mappings that refer to this index and update them
|
|
||||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
|
||||||
if (rt_mapping[i] == second)
|
|
||||||
rt_mapping[i] = candidate_count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_ASSERT(candidate_count < 2 * obj->render_target_count, "");
|
|
||||||
merged[candidate_count++] = combined;
|
|
||||||
did_merge = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (did_merge)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} while (did_merge);
|
|
||||||
|
|
||||||
uint32_t phys_count = 0;
|
|
||||||
for (uint32_t i = 0; i < candidate_count; ++i) {
|
|
||||||
if (merged[i].alive)
|
|
||||||
++phys_count;
|
|
||||||
}
|
|
||||||
obj->phys_render_targets =
|
|
||||||
RT_ARENA_PUSH_ARRAY(&obj->arena, rt_physical_render_target_info, phys_count);
|
|
||||||
if (!obj->phys_render_targets)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
obj->phys_render_target_count = 0;
|
|
||||||
for (uint32_t i = 0; i < candidate_count; ++i) {
|
|
||||||
if (merged[i].alive) {
|
|
||||||
uint32_t index = obj->phys_render_target_count;
|
|
||||||
if (merged[i].backbuffer)
|
|
||||||
obj->backbuffer = obj->phys_render_target_count;
|
|
||||||
obj->phys_render_targets[obj->phys_render_target_count++] = merged[i].info;
|
|
||||||
|
|
||||||
/* Update the mapping table */
|
|
||||||
for (uint32_t j = 0; j < obj->render_target_count; ++j) {
|
|
||||||
if (rt_mapping[j] == i)
|
|
||||||
rt_mapping[j] = index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Update pass render target references */
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
if (obj->passes[i].depth_stencil_attachment < UINT_MAX)
|
|
||||||
obj->passes[i].depth_stencil_attachment =
|
|
||||||
rt_mapping[obj->passes[i].depth_stencil_attachment];
|
|
||||||
for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j)
|
|
||||||
obj->passes[i].color_attachments[j] = rt_mapping[obj->passes[i].color_attachments[j]];
|
|
||||||
for (uint32_t j = 0; j < obj->passes[i].sampled_input_count; ++j)
|
|
||||||
obj->passes[i].sampled_inputs[j] = rt_mapping[obj->passes[i].sampled_inputs[j]];
|
|
||||||
}
|
|
||||||
obj->backbuffer = rt_mapping[obj->backbuffer];
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result CreateSynchronizationPoints(rt_render_graph_builder_obj *obj) {
|
|
||||||
RT_ASSERT(false, "Not implemented yet");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t *DeterminePassExecutionLevels(rt_render_graph_builder_obj *obj,
|
|
||||||
const uint32_t *schedule) {
|
|
||||||
uint32_t *execution_levels = RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->pass_count);
|
|
||||||
if (!execution_levels)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
const rt_pass_build_info *passes = obj->passes;
|
|
||||||
uint32_t pass_count = obj->pass_count;
|
|
||||||
for (uint32_t i = 0; i < pass_count; ++i) {
|
|
||||||
uint32_t level = 0;
|
|
||||||
uint32_t pass_idx = schedule[i];
|
|
||||||
for (uint32_t j = 0; j < passes[pass_idx].dependency_count; ++j) {
|
|
||||||
uint32_t dep_idx = passes[pass_idx].dependencies[j];
|
|
||||||
level = RT_MAX(execution_levels[dep_idx] + 1, level);
|
|
||||||
}
|
|
||||||
execution_levels[pass_idx] = level;
|
|
||||||
}
|
|
||||||
return execution_levels;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj,
|
|
||||||
const uint32_t *order,
|
|
||||||
const uint32_t *execution_levels) {
|
|
||||||
size_t runtime_data_size = obj->platform_cbs.GetRuntimeDataSize();
|
|
||||||
size_t required_size = sizeof(rt_render_graph);
|
|
||||||
required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
|
|
||||||
required_size += obj->sync_point_count * sizeof(rt_gpu_semaphore_handle);
|
|
||||||
required_size += obj->pass_count * sizeof(rt_render_pass);
|
|
||||||
required_size += obj->pass_count * runtime_data_size;
|
|
||||||
|
|
||||||
size_t pass_attachment_size = 0;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
required_size += strlen(obj->passes[i].name) + 1;
|
|
||||||
pass_attachment_size += obj->passes[i].color_attachment_count *
|
|
||||||
(sizeof(rt_render_target_handle) + sizeof(rt_color) +
|
|
||||||
sizeof(rt_pass_load_mode) + sizeof(rt_pass_write_mode));
|
|
||||||
pass_attachment_size +=
|
|
||||||
obj->passes[i].sampled_input_count * sizeof(rt_render_target_handle);
|
|
||||||
}
|
|
||||||
required_size += pass_attachment_size;
|
|
||||||
|
|
||||||
rt_render_graph *graph = rtAllocBuffer(required_size);
|
|
||||||
if (!graph)
|
|
||||||
return NULL;
|
|
||||||
memset(graph, 0, required_size);
|
|
||||||
graph->render_targets = (rt_render_target_handle *)(graph + 1);
|
|
||||||
graph->semaphores =
|
|
||||||
(rt_gpu_semaphore_handle *)(graph->render_targets + obj->phys_render_target_count);
|
|
||||||
graph->passes = (rt_render_pass *)(graph->semaphores + obj->sync_point_count);
|
|
||||||
char *attachment_storage = (char *)(graph->passes + obj->pass_count);
|
|
||||||
char *runtime_data = attachment_storage + pass_attachment_size;
|
|
||||||
char *names = runtime_data + runtime_data_size * obj->pass_count;
|
|
||||||
char *next_name = names;
|
|
||||||
|
|
||||||
graph->render_target_count = obj->phys_render_target_count;
|
|
||||||
graph->semaphore_count = obj->sync_point_count;
|
|
||||||
graph->pass_count = obj->pass_count;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < obj->phys_render_target_count; ++i) {
|
|
||||||
graph->render_targets[i] =
|
|
||||||
obj->platform_cbs.CreateRenderTarget(&obj->phys_render_targets[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < obj->sync_point_count; ++i) {
|
|
||||||
// TODO
|
|
||||||
RT_NOT_IMPLEMENTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
|
||||||
uint32_t passidx = order[i];
|
|
||||||
size_t namelen = strlen(obj->passes[passidx].name);
|
|
||||||
graph->passes[i].Execute = RT_VERIFY(obj->passes[passidx].Execute);
|
|
||||||
graph->passes[i].user_data = obj->passes[passidx].userdata;
|
|
||||||
graph->passes[i].flags = obj->passes[passidx].flags;
|
|
||||||
graph->passes[i].id = rtCalculateRenderPassID(obj->passes[passidx].name, namelen);
|
|
||||||
graph->passes[i].first_signal = 0;
|
|
||||||
graph->passes[i].signal_count = 0;
|
|
||||||
graph->passes[i].first_wait = 0;
|
|
||||||
graph->passes[i].wait_count = 0;
|
|
||||||
graph->passes[i].execution_level = execution_levels[passidx];
|
|
||||||
graph->passes[i].render_area = obj->passes[passidx].render_area;
|
|
||||||
graph->passes[i].min_depth = obj->passes[passidx].min_depth;
|
|
||||||
graph->passes[i].max_depth = obj->passes[passidx].max_depth;
|
|
||||||
|
|
||||||
graph->passes[i].depth_stencil =
|
|
||||||
(obj->passes[i].depth_stencil_attachment != UINT_MAX)
|
|
||||||
? graph->render_targets[obj->passes[i].depth_stencil_attachment]
|
|
||||||
: (rt_render_target_handle)RT_INVALID_HANDLE;
|
|
||||||
graph->passes[i].depth_stencil_clear_value = obj->passes[i].depth_stencil_clear_value;
|
|
||||||
graph->passes[i].depth_stencil_load = obj->passes[i].depth_stencil_load;
|
|
||||||
graph->passes[i].depth_stencil_write = obj->passes[i].depth_stencil_write;
|
|
||||||
|
|
||||||
graph->passes[i].color_output_count = obj->passes[i].color_attachment_count;
|
|
||||||
if (graph->passes[i].color_output_count) {
|
|
||||||
graph->passes[i].color_outputs = (rt_render_target_handle *)attachment_storage;
|
|
||||||
attachment_storage +=
|
|
||||||
sizeof(rt_render_target_handle) * graph->passes[i].color_output_count;
|
|
||||||
graph->passes[i].color_clear_values = (rt_color *)attachment_storage;
|
|
||||||
attachment_storage += sizeof(rt_color) * graph->passes[i].color_output_count;
|
|
||||||
graph->passes[i].color_loads = (rt_pass_load_mode *)attachment_storage;
|
|
||||||
attachment_storage += sizeof(rt_pass_load_mode) * graph->passes[i].color_output_count;
|
|
||||||
graph->passes[i].color_writes = (rt_pass_write_mode *)attachment_storage;
|
|
||||||
attachment_storage += sizeof(rt_pass_write_mode) * graph->passes[i].color_output_count;
|
|
||||||
|
|
||||||
for (uint32_t j = 0; j < graph->passes[i].color_output_count; ++j) {
|
|
||||||
graph->passes[i].color_outputs[j] =
|
|
||||||
graph->render_targets[obj->passes[i].color_attachments[j]];
|
|
||||||
graph->passes[i].color_clear_values[j] =
|
|
||||||
obj->passes[i].color_attachment_clear_values[j];
|
|
||||||
graph->passes[i].color_loads[j] = obj->passes[i].color_attachment_loads[j];
|
|
||||||
graph->passes[i].color_writes[j] = obj->passes[i].color_attachment_writes[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
graph->passes[i].sampled_input_count = obj->passes[i].sampled_input_count;
|
|
||||||
if (graph->passes[i].sampled_input_count) {
|
|
||||||
graph->passes[i].sampled_inputs = (rt_render_target_handle *)attachment_storage;
|
|
||||||
attachment_storage +=
|
|
||||||
sizeof(rt_render_target_handle) * graph->passes[i].sampled_input_count;
|
|
||||||
|
|
||||||
for (uint32_t j = 0; j < graph->passes[i].sampled_input_count; ++j) {
|
|
||||||
graph->passes[i].sampled_inputs[j] =
|
|
||||||
graph->render_targets[obj->passes[i].sampled_inputs[j]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
graph->passes[i].runtime_data = (void *)(runtime_data + i * runtime_data_size);
|
|
||||||
|
|
||||||
graph->passes[i].name = next_name;
|
|
||||||
next_name += namelen + 1;
|
|
||||||
memcpy((char *)graph->passes[i].name, obj->passes[passidx].name, namelen + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
graph->backbuffer_index = obj->backbuffer;
|
|
||||||
|
|
||||||
return graph;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result Build(void *_obj, rt_render_graph **p_graph) {
|
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
|
||||||
uint32_t *naive_order;
|
|
||||||
uint32_t pass_count;
|
|
||||||
rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count);
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
|
|
||||||
res = DeterminePassDependencies(obj);
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
|
|
||||||
uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order);
|
|
||||||
if (!optimized_order)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
|
|
||||||
DetermineRenderTargetUsage(obj, pass_count, optimized_order);
|
|
||||||
res = GreedyMergeRenderTargets(obj);
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
|
|
||||||
uint32_t *execution_levels = DeterminePassExecutionLevels(obj, optimized_order);
|
|
||||||
if (!execution_levels)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
|
|
||||||
if (obj->platform_cbs.RequireExplicitSynchronization()) {
|
|
||||||
res = CreateSynchronizationPoints(obj);
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
} else {
|
|
||||||
obj->sync_point_count = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
*p_graph = CreateRenderGraph(obj, optimized_order, execution_levels);
|
|
||||||
return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_graph_builder
|
|
||||||
rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs) {
|
|
||||||
// TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions
|
|
||||||
rt_render_graph_builder_obj *obj = malloc(sizeof(*obj));
|
|
||||||
RT_ASSERT(obj, "Failed to allocate the builder object.");
|
|
||||||
memset(obj, 0, sizeof(*obj));
|
|
||||||
rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16));
|
|
||||||
RT_ASSERT(arena_res.ok, "");
|
|
||||||
obj->arena = arena_res.arena;
|
|
||||||
obj->platform_cbs = *platform_cbs;
|
|
||||||
|
|
||||||
return (rt_render_graph_builder){
|
|
||||||
.obj = obj,
|
|
||||||
.AddRenderTarget = AddRenderTarget,
|
|
||||||
.SetBackbuffer = SetBackbuffer,
|
|
||||||
.AddRenderPass = AddRenderPass,
|
|
||||||
.AddColorOutput = AddColorOutput,
|
|
||||||
.AddSampledInput = AddSampledInput,
|
|
||||||
.SetDepthStencilAttachment = SetDepthStencilAttachment,
|
|
||||||
.SetRenderArea = SetRenderArea,
|
|
||||||
.BindRenderPass = BindRenderPass,
|
|
||||||
.Build = Build,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder) {
|
|
||||||
rt_render_graph_builder_obj *obj = builder->obj;
|
|
||||||
rtReleaseArena(&obj->arena);
|
|
||||||
free(obj);
|
|
||||||
memset(builder, 0, sizeof(*builder));
|
|
||||||
}
|
|
@ -1,99 +0,0 @@
|
|||||||
#ifndef RT_RENDERER_COMMON_RENDER_GRAPH_H
|
|
||||||
#define RT_RENDERER_COMMON_RENDER_GRAPH_H
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const char *name;
|
|
||||||
rt_pixel_format format;
|
|
||||||
unsigned int width;
|
|
||||||
unsigned int height;
|
|
||||||
|
|
||||||
unsigned int samples;
|
|
||||||
unsigned int layers;
|
|
||||||
} rt_physical_render_target_info;
|
|
||||||
|
|
||||||
typedef rt_render_target_handle
|
|
||||||
rt_rgb_create_render_target_fn(const rt_physical_render_target_info *rt_info);
|
|
||||||
typedef int rt_rgb_require_explicit_synchronization_fn(void);
|
|
||||||
typedef size_t rt_rgb_get_runtime_data_size_fn(void);
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_rgb_create_render_target_fn *CreateRenderTarget;
|
|
||||||
rt_rgb_require_explicit_synchronization_fn *RequireExplicitSynchronization;
|
|
||||||
rt_rgb_get_runtime_data_size_fn *GetRuntimeDataSize;
|
|
||||||
} rt_render_graph_builder_platform_callbacks;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t flags;
|
|
||||||
|
|
||||||
/* Used for cheap referencing */
|
|
||||||
uint32_t id;
|
|
||||||
|
|
||||||
/* Used for debug output */
|
|
||||||
const char *name;
|
|
||||||
|
|
||||||
/* Viewport info */
|
|
||||||
rt_rect2 render_area;
|
|
||||||
float min_depth;
|
|
||||||
float max_depth;
|
|
||||||
|
|
||||||
/* Render targets */
|
|
||||||
rt_render_target_handle *color_outputs;
|
|
||||||
rt_color *color_clear_values;
|
|
||||||
rt_pass_load_mode *color_loads;
|
|
||||||
rt_pass_write_mode *color_writes;
|
|
||||||
uint32_t color_output_count;
|
|
||||||
rt_render_target_handle depth_stencil;
|
|
||||||
rt_depth_stencil_value depth_stencil_clear_value;
|
|
||||||
rt_pass_load_mode depth_stencil_load;
|
|
||||||
rt_pass_write_mode depth_stencil_write;
|
|
||||||
rt_render_target_handle *sampled_inputs;
|
|
||||||
uint32_t sampled_input_count;
|
|
||||||
|
|
||||||
/* Used for parallelisation on the CPU-side. All passes with execution level N can
|
|
||||||
* be recorded in parallel, after passes with level N-1 have finished. */
|
|
||||||
uint32_t execution_level;
|
|
||||||
|
|
||||||
/* GFX layer function for executing the pass */
|
|
||||||
rt_execute_render_pass_fn *Execute;
|
|
||||||
void *user_data;
|
|
||||||
|
|
||||||
/* Allocated by the backend, used during runtime */
|
|
||||||
void *runtime_data;
|
|
||||||
|
|
||||||
/* These refer to the semaphores array */
|
|
||||||
uint32_t first_wait;
|
|
||||||
uint32_t wait_count;
|
|
||||||
uint32_t first_signal;
|
|
||||||
uint32_t signal_count;
|
|
||||||
} rt_render_pass;
|
|
||||||
|
|
||||||
struct rt_render_graph_s {
|
|
||||||
rt_render_target_handle *render_targets;
|
|
||||||
uint32_t render_target_count;
|
|
||||||
|
|
||||||
rt_gpu_semaphore_handle *semaphores;
|
|
||||||
uint32_t semaphore_count;
|
|
||||||
|
|
||||||
rt_render_pass *passes;
|
|
||||||
uint32_t pass_count;
|
|
||||||
|
|
||||||
uint32_t backbuffer_index;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
rt_render_graph_builder
|
|
||||||
rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs);
|
|
||||||
|
|
||||||
void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,146 +0,0 @@
|
|||||||
#include <d3d11.h>
|
|
||||||
#include <d3d11_1.h>
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/threading_helpers.hpp"
|
|
||||||
|
|
||||||
#include "device_objects.hpp"
|
|
||||||
#include "gpu.hpp"
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_Dx11MaxBuffers,
|
|
||||||
"Maximum number of simultaneously existing buffers. Default: 4096",
|
|
||||||
4096);
|
|
||||||
|
|
||||||
static rt_buffer *_buffers;
|
|
||||||
static rt_buffer *_first_free;
|
|
||||||
static rt_mutex *_lock;
|
|
||||||
|
|
||||||
rt_result InitBufferManagement() {
|
|
||||||
_buffers =
|
|
||||||
reinterpret_cast<rt_buffer *>(calloc((size_t)rt_Dx11MaxBuffers.i, sizeof(rt_buffer)));
|
|
||||||
if (!_buffers) {
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
_lock = rtCreateMutex();
|
|
||||||
if (!_lock) {
|
|
||||||
free(_buffers);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
_first_free = _buffers + 2;
|
|
||||||
for (int i = 0; i < rt_Dx11MaxBuffers.i; ++i) {
|
|
||||||
_buffers[i].next_free = &_buffers[i + 1];
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownBufferManagement() {
|
|
||||||
for (int i = 0; i < rt_Dx11MaxBuffers.i; ++i) {
|
|
||||||
if (_buffers[i].buffer)
|
|
||||||
_buffers[i].buffer->Release();
|
|
||||||
}
|
|
||||||
free(_buffers);
|
|
||||||
rtDestroyMutex(_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_buffer *rtGetBuffer(rt_buffer_handle handle) {
|
|
||||||
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxBuffers.i)
|
|
||||||
return nullptr;
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
if (handle.version != _buffers[handle.index].version)
|
|
||||||
return nullptr;
|
|
||||||
return &_buffers[handle.index];
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
|
|
||||||
const rt_buffer_info *info,
|
|
||||||
rt_buffer_handle *p_buffers) {
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
rtLockMutex(_lock);
|
|
||||||
rt_buffer *slot = _first_free;
|
|
||||||
if (slot)
|
|
||||||
_first_free = slot->next_free;
|
|
||||||
rtUnlockMutex(_lock);
|
|
||||||
|
|
||||||
if (!slot) {
|
|
||||||
rtLog("dx11", "Failed to allocate a command buffer slot.");
|
|
||||||
rtLockMutex(_lock);
|
|
||||||
for (uint32_t j = 0; j < i; ++j) {
|
|
||||||
rt_buffer *s = &_buffers[p_buffers[j].index];
|
|
||||||
s->next_free = _first_free;
|
|
||||||
_first_free = s;
|
|
||||||
_first_free = s;
|
|
||||||
}
|
|
||||||
rtUnlockMutex(_lock);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
|
|
||||||
if (info[i].usage == RT_BUFFER_USAGE_STATIC) {
|
|
||||||
usage = D3D11_USAGE_IMMUTABLE;
|
|
||||||
} else if (info[i].usage == RT_BUFFER_USAGE_DYNAMIC) {
|
|
||||||
usage = D3D11_USAGE_DEFAULT;
|
|
||||||
} else if (info[i].usage == RT_BUFFER_USAGE_TRANSIENT) {
|
|
||||||
usage = D3D11_USAGE_DYNAMIC;
|
|
||||||
}
|
|
||||||
|
|
||||||
UINT bind_flags = D3D11_BIND_UNORDERED_ACCESS;
|
|
||||||
if (info[i].type == RT_BUFFER_TYPE_VERTEX)
|
|
||||||
bind_flags = D3D11_BIND_VERTEX_BUFFER;
|
|
||||||
else if (info[i].type == RT_BUFFER_TYPE_INDEX)
|
|
||||||
bind_flags = D3D11_BIND_INDEX_BUFFER;
|
|
||||||
else if (info[i].type == RT_BUFFER_TYPE_UNIFORM)
|
|
||||||
bind_flags = D3D11_BIND_CONSTANT_BUFFER;
|
|
||||||
else if (info[i].type == RT_BUFFER_TYPE_STORAGE)
|
|
||||||
bind_flags = D3D11_BIND_UNORDERED_ACCESS;
|
|
||||||
|
|
||||||
|
|
||||||
D3D11_BUFFER_DESC desc = {};
|
|
||||||
desc.ByteWidth = static_cast<UINT>(((info[i].size + 15) / 16) * 16);
|
|
||||||
desc.Usage = usage;
|
|
||||||
desc.BindFlags = bind_flags;
|
|
||||||
desc.CPUAccessFlags = 0;
|
|
||||||
desc.MiscFlags = 0;
|
|
||||||
desc.StructureByteStride = 1;
|
|
||||||
|
|
||||||
D3D11_SUBRESOURCE_DATA data;
|
|
||||||
data.pSysMem = info->data;
|
|
||||||
data.SysMemPitch = 0;
|
|
||||||
data.SysMemSlicePitch = 0;
|
|
||||||
|
|
||||||
if (FAILED(
|
|
||||||
g_gpu.device->CreateBuffer(&desc, info[i].data ? &data : nullptr, &slot->buffer))) {
|
|
||||||
rtLog("dx11", "Failed to create a deferred context.");
|
|
||||||
auto lock_guard = rtAutoLock(_lock);
|
|
||||||
for (uint32_t j = 0; j < i; ++j) {
|
|
||||||
rt_buffer *s = &_buffers[p_buffers[j].index];
|
|
||||||
s->next_free = _first_free;
|
|
||||||
_first_free = s;
|
|
||||||
}
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
const uint32_t index = (uint32_t)(slot - _buffers);
|
|
||||||
p_buffers[i].version = slot->version;
|
|
||||||
p_buffers[i].index = index;
|
|
||||||
}
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
if (!RT_IS_HANDLE_VALID(buffers[i]) || (int)buffers[i].index >= rt_Dx11MaxBuffers.i)
|
|
||||||
continue;
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
if (buffers[i].version != _buffers[buffers[i].index].version)
|
|
||||||
continue;
|
|
||||||
_buffers[buffers[i].index].buffer->Release();
|
|
||||||
_buffers[buffers[i].index].next_free = _first_free;
|
|
||||||
_first_free = &_buffers[buffers[i].index];
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,148 +0,0 @@
|
|||||||
#include <d3d11.h>
|
|
||||||
#include <d3d11_1.h>
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
#include "runtime/threading_helpers.hpp"
|
|
||||||
|
|
||||||
#include "device_objects.hpp"
|
|
||||||
#include "gpu.hpp"
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_Dx11MaxCommandBuffers,
|
|
||||||
"Maximum number of simultaneously created command buffers. Default: 1024",
|
|
||||||
1024);
|
|
||||||
|
|
||||||
static rt_command_buffer *_buffers;
|
|
||||||
static rt_command_buffer *_first_free;
|
|
||||||
static rt_mutex *_lock;
|
|
||||||
|
|
||||||
rt_result InitCommandBufferManagement() {
|
|
||||||
_buffers = reinterpret_cast<rt_command_buffer *>(
|
|
||||||
calloc((size_t)rt_Dx11MaxCommandBuffers.i, sizeof(rt_command_buffer)));
|
|
||||||
if (!_buffers)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
_first_free = &_buffers[1];
|
|
||||||
|
|
||||||
_lock = rtCreateMutex();
|
|
||||||
if (!_lock) {
|
|
||||||
free(_buffers);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < rt_Dx11MaxCommandBuffers.i - 1; ++i) {
|
|
||||||
_buffers[i].next_free = &_buffers[i + 1];
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownCommandBufferManagement() {
|
|
||||||
for (int i = 0; i < rt_Dx11MaxCommandBuffers.i; ++i) {
|
|
||||||
if (_buffers[i].context)
|
|
||||||
_buffers[i].context->Release();
|
|
||||||
}
|
|
||||||
free(_buffers);
|
|
||||||
_buffers = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles) {
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
rtLockMutex(_lock);
|
|
||||||
rt_command_buffer *slot = _first_free;
|
|
||||||
if (slot)
|
|
||||||
_first_free = slot->next_free;
|
|
||||||
rtUnlockMutex(_lock);
|
|
||||||
|
|
||||||
if (!slot) {
|
|
||||||
rtLog("dx11", "Failed to allocate a command buffer slot.");
|
|
||||||
rtLockMutex(_lock);
|
|
||||||
for (uint32_t j = 0; j < i; ++j) {
|
|
||||||
rt_command_buffer *s = &_buffers[p_handles[j].index];
|
|
||||||
s->next_free = _first_free;
|
|
||||||
_first_free = s;
|
|
||||||
}
|
|
||||||
rtUnlockMutex(_lock);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!slot->context) {
|
|
||||||
if (FAILED(g_gpu.device->CreateDeferredContext1(0, &slot->context))) {
|
|
||||||
rtLog("dx11", "Failed to create a deferred context.");
|
|
||||||
auto lock_guard = rtAutoLock(_lock);
|
|
||||||
for (uint32_t j = 0; j < i; ++j) {
|
|
||||||
rt_command_buffer *s = &_buffers[p_handles[j].index];
|
|
||||||
s->next_free = _first_free;
|
|
||||||
_first_free = s;
|
|
||||||
}
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
if (FAILED(slot->context->QueryInterface(IID_PPV_ARGS(&slot->annotation)))) {
|
|
||||||
rtLog("dx11", "Failed to retrieve the annotation interface.");
|
|
||||||
slot->annotation = nullptr;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
slot->context->ClearState();
|
|
||||||
}
|
|
||||||
|
|
||||||
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
const uint32_t index = (uint32_t)(slot - _buffers);
|
|
||||||
p_handles[i].version = slot->version;
|
|
||||||
p_handles[i].index = index;
|
|
||||||
}
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles) {
|
|
||||||
// TODO: Handle semaphores
|
|
||||||
|
|
||||||
// Submit the command lists to the gpu
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
rt_command_buffer *cmdbuf = &_buffers[handles[i].index];
|
|
||||||
if (cmdbuf->version != handles[i].version) {
|
|
||||||
rtLog("dx11", "Tried to submit an invalid command buffer (version mismatch)");
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
ID3D11CommandList *cmdlist;
|
|
||||||
if (FAILED(cmdbuf->context->FinishCommandList(FALSE, &cmdlist))) {
|
|
||||||
rtLog("dx11", "FinishCommandList failed");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
rtLockMutex(g_gpu.context_lock);
|
|
||||||
g_gpu.device_context->ExecuteCommandList(cmdlist, FALSE);
|
|
||||||
rtUnlockMutex(g_gpu.context_lock);
|
|
||||||
|
|
||||||
rtLockMutex(_lock);
|
|
||||||
cmdbuf->next_free = _first_free;
|
|
||||||
_first_free = cmdbuf;
|
|
||||||
rtUnlockMutex(_lock);
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle) {
|
|
||||||
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxCommandBuffers.i)
|
|
||||||
return nullptr;
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
if (handle.version != _buffers[handle.index].version)
|
|
||||||
return nullptr;
|
|
||||||
return &_buffers[handle.index];
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" rt_result
|
|
||||||
RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
|
||||||
const rt_alloc_command_buffer_info *,
|
|
||||||
rt_command_buffer_handle *p_command_buffers) {
|
|
||||||
return rtAllocCommandBuffers(count, p_command_buffers);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" rt_result
|
|
||||||
RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue, const rt_submit_command_buffers_info *info) {
|
|
||||||
return rtSubmitCommandBuffers(info->command_buffer_count, info->command_buffers);
|
|
||||||
}
|
|
@ -1,167 +0,0 @@
|
|||||||
#include <d3d11.h>
|
|
||||||
#include <d3d11_1.h>
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
|
|
||||||
#include "device_objects.hpp"
|
|
||||||
#include "gpu.hpp"
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmdhandle,
|
|
||||||
const rt_cmd_begin_pass_info *info) {
|
|
||||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
|
||||||
if (!RT_VERIFY(cmd))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (cmd->annotation) {
|
|
||||||
WCHAR wname[128];
|
|
||||||
if (rtUTF8ToWStr(info->name, wname, sizeof(wname)) == RT_SUCCESS)
|
|
||||||
cmd->annotation->BeginEvent(wname);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Setup rtvs
|
|
||||||
ID3D11RenderTargetView *rtvs[4];
|
|
||||||
ID3D11DepthStencilView *dsv = nullptr;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < info->color_buffer_count; ++i) {
|
|
||||||
rt_render_target *rt = rtGetRenderTarget(info->color_buffers[i]);
|
|
||||||
if (!RT_VERIFY(rt))
|
|
||||||
return;
|
|
||||||
RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target");
|
|
||||||
rtvs[i] = rt->rtv;
|
|
||||||
|
|
||||||
if (info->color_buffer_loads[i] == RT_PASS_LOAD_MODE_CLEAR) {
|
|
||||||
FLOAT color[4] = {
|
|
||||||
info->color_buffer_clear_values[i].color.r,
|
|
||||||
info->color_buffer_clear_values[i].color.g,
|
|
||||||
info->color_buffer_clear_values[i].color.b,
|
|
||||||
info->color_buffer_clear_values[i].color.a,
|
|
||||||
};
|
|
||||||
cmd->context->ClearRenderTargetView(rt->rtv, color);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_target *dsvrt = rtGetRenderTarget(info->depth_stencil_buffer);
|
|
||||||
if (dsvrt) {
|
|
||||||
RT_ASSERT(dsvrt->IsDepthStencilTarget(),
|
|
||||||
"Need to provide a valid depth stencil render target");
|
|
||||||
dsv = dsvrt->dsv;
|
|
||||||
|
|
||||||
if (info->depth_stencil_buffer_load == RT_PASS_LOAD_MODE_CLEAR)
|
|
||||||
cmd->context->ClearDepthStencilView(
|
|
||||||
dsv,
|
|
||||||
(dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL
|
|
||||||
: D3D11_CLEAR_DEPTH,
|
|
||||||
info->depth_stencil_buffer_clear_value.depth_stencil.depth,
|
|
||||||
static_cast<UINT8>(info->depth_stencil_buffer_clear_value.depth_stencil.stencil));
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd->context->OMSetRenderTargets(static_cast<UINT>(info->color_buffer_count), rtvs, dsv);
|
|
||||||
|
|
||||||
D3D11_VIEWPORT viewport;
|
|
||||||
viewport.TopLeftX = static_cast<float>(info->render_area.offset.x);
|
|
||||||
viewport.TopLeftY = static_cast<float>(info->render_area.offset.y);
|
|
||||||
viewport.Width = static_cast<float>(info->render_area.size.x);
|
|
||||||
viewport.Height = static_cast<float>(info->render_area.size.y);
|
|
||||||
viewport.MinDepth = 0.f;
|
|
||||||
viewport.MaxDepth = 1.f;
|
|
||||||
cmd->context->RSSetViewports(1, &viewport);
|
|
||||||
|
|
||||||
// We currently only support triangles, so here is a good place to set this
|
|
||||||
cmd->context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmdhandle) {
|
|
||||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
|
||||||
if (!RT_VERIFY(cmd))
|
|
||||||
return;
|
|
||||||
if (cmd->annotation) {
|
|
||||||
cmd->annotation->EndEvent();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdhandle,
|
|
||||||
rt_render_target_handle target,
|
|
||||||
rt_render_target_state state) {
|
|
||||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
|
||||||
if (!RT_VERIFY(cmd))
|
|
||||||
return;
|
|
||||||
RT_UNUSED(target);
|
|
||||||
RT_UNUSED(state);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void
|
|
||||||
RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdhandle,
|
|
||||||
rt_render_target_handle render_target) {
|
|
||||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
|
||||||
if (!RT_VERIFY(cmd))
|
|
||||||
return;
|
|
||||||
RT_UNUSED(render_target);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle,
|
|
||||||
rt_pipeline_handle pipeline_handle) {
|
|
||||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
|
||||||
if (!RT_VERIFY(cmd))
|
|
||||||
return;
|
|
||||||
rt_pipeline *pipeline = rtGetPipeline(pipeline_handle);
|
|
||||||
|
|
||||||
if (pipeline->IsComputePipeline()) {
|
|
||||||
rtReportError("dx11",
|
|
||||||
"Attempted to bind a compute pipeline via CmdBindPipeline. Use "
|
|
||||||
"CmdBindComputePipeline instead.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto context = cmd->context;
|
|
||||||
context->IASetInputLayout(pipeline->input_layout);
|
|
||||||
context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
||||||
context->VSSetShader(pipeline->vertex_shader, nullptr, 0);
|
|
||||||
context->PSSetShader(pipeline->pixel_shader, nullptr, 0);
|
|
||||||
context->RSSetState(pipeline->rasterizer_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle,
|
|
||||||
uint32_t first_binding,
|
|
||||||
uint32_t count,
|
|
||||||
const rt_buffer_handle *buffers,
|
|
||||||
const uint32_t *_strides,
|
|
||||||
const uint32_t *_offsets) {
|
|
||||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
|
||||||
if (!RT_VERIFY(cmd))
|
|
||||||
return;
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
if (!temp.arena)
|
|
||||||
return;
|
|
||||||
|
|
||||||
ID3D11Buffer **vbos = RT_ARENA_PUSH_ARRAY(temp.arena, ID3D11Buffer *, count);
|
|
||||||
static_assert(sizeof(UINT) == sizeof(uint32_t));
|
|
||||||
const UINT *offsets = _offsets;
|
|
||||||
const UINT *strides = _strides;
|
|
||||||
|
|
||||||
if (!vbos || !strides)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
if (!offsets) {
|
|
||||||
offsets = RT_ARENA_PUSH_ARRAY_ZERO(temp.arena, UINT, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
rt_buffer *buffer = rtGetBuffer(buffers[i]);
|
|
||||||
RT_ASSERT(buffer->type == RT_BUFFER_TYPE_VERTEX, "Buffer must be a vertex buffer");
|
|
||||||
vbos[i] = buffer->buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd->context->IASetVertexBuffers(first_binding, count, vbos, strides, offsets);
|
|
||||||
|
|
||||||
out:
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void
|
|
||||||
RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle, uint32_t first, uint32_t count) {
|
|
||||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
|
||||||
if (!RT_VERIFY(cmd))
|
|
||||||
return;
|
|
||||||
cmd->context->Draw(count, first);
|
|
||||||
}
|
|
@ -1,95 +0,0 @@
|
|||||||
#ifndef RT_DX11_DEVICE_OBJECTS_HPP
|
|
||||||
#define RT_DX11_DEVICE_OBJECTS_HPP
|
|
||||||
|
|
||||||
// Types containing various api objects
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <d3d11.h>
|
|
||||||
#include <d3d11_1.h>
|
|
||||||
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
struct rt_render_target {
|
|
||||||
// Only one of these should be valid
|
|
||||||
ID3D11RenderTargetView *rtv;
|
|
||||||
ID3D11DepthStencilView *dsv;
|
|
||||||
|
|
||||||
ID3D11Texture2D *texture;
|
|
||||||
|
|
||||||
rt_pixel_format format;
|
|
||||||
|
|
||||||
uint32_t version;
|
|
||||||
rt_render_target *next_free;
|
|
||||||
|
|
||||||
RT_INLINE bool HasStencilComponent() const {
|
|
||||||
return format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_INLINE bool IsColorRenderTarget() const {
|
|
||||||
RT_ASSERT(!(rtv != nullptr && dsv != nullptr),
|
|
||||||
"A render target should not contain a render target and a depth stencil view");
|
|
||||||
return rtv != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_INLINE bool IsDepthStencilTarget() const {
|
|
||||||
RT_ASSERT(!(rtv != nullptr && dsv != nullptr),
|
|
||||||
"A render target should not contain a render target and a depth stencil view");
|
|
||||||
return dsv != nullptr;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct rt_command_buffer {
|
|
||||||
// Only created once and then re-used.
|
|
||||||
ID3D11DeviceContext1 *context;
|
|
||||||
ID3DUserDefinedAnnotation *annotation;
|
|
||||||
|
|
||||||
uint32_t version;
|
|
||||||
rt_command_buffer *next_free;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct rt_buffer {
|
|
||||||
ID3D11Buffer *buffer;
|
|
||||||
rt_buffer_type type;
|
|
||||||
rt_buffer_usage usage;
|
|
||||||
|
|
||||||
uint32_t version;
|
|
||||||
rt_buffer *next_free;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct rt_pipeline {
|
|
||||||
ID3D11InputLayout *input_layout;
|
|
||||||
ID3D11VertexShader *vertex_shader;
|
|
||||||
ID3D11PixelShader *pixel_shader;
|
|
||||||
|
|
||||||
ID3D11ComputeShader *compute_shader;
|
|
||||||
|
|
||||||
ID3D11RasterizerState *rasterizer_state;
|
|
||||||
|
|
||||||
rt_pipeline *next_free;
|
|
||||||
uint32_t version;
|
|
||||||
|
|
||||||
RT_INLINE bool IsComputePipeline() const {
|
|
||||||
RT_ASSERT(!(compute_shader && (vertex_shader || pixel_shader)),
|
|
||||||
"A pipeline should contain either a compute shader or graphics shaders.");
|
|
||||||
return compute_shader != nullptr;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct rt_render_target_create_info {
|
|
||||||
rt_pixel_format format;
|
|
||||||
uint32_t width;
|
|
||||||
uint32_t height;
|
|
||||||
const char *name;
|
|
||||||
};
|
|
||||||
|
|
||||||
rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info);
|
|
||||||
void rtDestroyRenderTarget(rt_render_target_handle handle);
|
|
||||||
|
|
||||||
rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles);
|
|
||||||
rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles);
|
|
||||||
|
|
||||||
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle);
|
|
||||||
rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle);
|
|
||||||
rt_buffer *rtGetBuffer(rt_buffer_handle handle);
|
|
||||||
rt_pipeline *rtGetPipeline(rt_pipeline_handle handle);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,43 +0,0 @@
|
|||||||
#ifndef RT_DX11_GPU_HPP
|
|
||||||
#define RT_DX11_GPU_HPP
|
|
||||||
|
|
||||||
#include <wrl.h>
|
|
||||||
#include <d3d11.h>
|
|
||||||
#include <d3d11_1.h>
|
|
||||||
#include <dxgi1_3.h>
|
|
||||||
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
#define RT_DX11_MAX_FRAMES_IN_FLIGHT 2
|
|
||||||
|
|
||||||
// Smart pointer for COM-Objects
|
|
||||||
template<typename T>
|
|
||||||
using ComPtr = Microsoft::WRL::ComPtr<T>;
|
|
||||||
|
|
||||||
struct rt_swap_chain {
|
|
||||||
ComPtr<IDXGISwapChain1> swap_chain;
|
|
||||||
ComPtr<ID3D11RenderTargetView> rtv;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
struct rt_gpu {
|
|
||||||
ComPtr<ID3D11Device1> device;
|
|
||||||
ComPtr<ID3D11DeviceContext1> device_context;
|
|
||||||
ComPtr<IDXGIFactory2> dxgi_factory;
|
|
||||||
|
|
||||||
rt_swap_chain swap_chain;
|
|
||||||
|
|
||||||
rt_mutex *context_lock;
|
|
||||||
|
|
||||||
D3D_FEATURE_LEVEL feature_level;
|
|
||||||
D3D11_FEATURE_DATA_THREADING threading_support;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifndef DONT_DEFINE_GPU_GLOBAL
|
|
||||||
extern rt_gpu g_gpu;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
DXGI_FORMAT rtConvertPixelFormat(rt_pixel_format format);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,35 +0,0 @@
|
|||||||
#include "gpu.hpp"
|
|
||||||
|
|
||||||
DXGI_FORMAT rtConvertPixelFormat(rt_pixel_format format) {
|
|
||||||
switch (format) {
|
|
||||||
case RT_PIXEL_FORMAT_INVALID:
|
|
||||||
return DXGI_FORMAT_UNKNOWN;
|
|
||||||
case RT_PIXEL_FORMAT_R8G8B8A8_UNORM:
|
|
||||||
return DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
||||||
case RT_PIXEL_FORMAT_B8G8R8A8_UNORM:
|
|
||||||
return DXGI_FORMAT_B8G8R8A8_UNORM;
|
|
||||||
case RT_PIXEL_FORMAT_R8G8B8A8_SRGB:
|
|
||||||
return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
|
|
||||||
case RT_PIXEL_FORMAT_B8G8R8A8_SRGB:
|
|
||||||
return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
|
|
||||||
case RT_PIXEL_FORMAT_R8G8B8_UNORM:
|
|
||||||
return DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
||||||
case RT_PIXEL_FORMAT_B8G8R8_UNORM:
|
|
||||||
return DXGI_FORMAT_B8G8R8X8_UNORM;
|
|
||||||
case RT_PIXEL_FORMAT_R8G8B8_SRGB:
|
|
||||||
return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
|
|
||||||
case RT_PIXEL_FORMAT_B8G8R8_SRGB:
|
|
||||||
return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB;
|
|
||||||
|
|
||||||
case RT_PIXEL_FORMAT_DEPTH24_STENCIL8:
|
|
||||||
return DXGI_FORMAT_D24_UNORM_S8_UINT;
|
|
||||||
case RT_PIXEL_FORMAT_DEPTH32:
|
|
||||||
return DXGI_FORMAT_D32_FLOAT;
|
|
||||||
|
|
||||||
case RT_PIXEL_FORMAT_SWAPCHAIN:
|
|
||||||
return DXGI_FORMAT_B8G8R8A8_UNORM;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return DXGI_FORMAT_UNKNOWN;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,288 +0,0 @@
|
|||||||
#ifndef _WIN32
|
|
||||||
#pragma warning Building DX11 on non - windows is probably a mistake
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <d3d11.h>
|
|
||||||
#include <dxgi1_3.h>
|
|
||||||
#include <wrl.h>
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/config.h"
|
|
||||||
|
|
||||||
#define DONT_DEFINE_RENDERER_GLOBAL
|
|
||||||
#include "gpu.hpp"
|
|
||||||
|
|
||||||
RT_CVAR_S(
|
|
||||||
rt_Dx11AdapterName,
|
|
||||||
"Name of the adapter that should be used for device creation. Default: \"\" (Use default)",
|
|
||||||
"");
|
|
||||||
RT_CVAR_I(rt_Dx11VSync, "Enable vsync. Default: 1", 1);
|
|
||||||
RT_CVAR_I(rt_Dx11MaxSubmittedCommandBuffers,
|
|
||||||
"Maximum number of submitted command buffers per frame. Default: 1024",
|
|
||||||
1024);
|
|
||||||
|
|
||||||
extern rt_cvar rt_Dx11MaxCommandBuffers;
|
|
||||||
|
|
||||||
rt_gpu g_gpu;
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(RegisterCVars)(void) {
|
|
||||||
rtRegisterCVAR(&rt_Dx11AdapterName);
|
|
||||||
rtRegisterCVAR(&rt_Dx11VSync);
|
|
||||||
rtRegisterCVAR(&rt_Dx11MaxCommandBuffers);
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_swap_chain CreateSwapChain(HWND hwnd) {
|
|
||||||
rt_swap_chain swc;
|
|
||||||
|
|
||||||
DXGI_SWAP_CHAIN_DESC1 desc;
|
|
||||||
desc.Width = 0; // use window width
|
|
||||||
desc.Height = 0; // use window height
|
|
||||||
desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; // can't specify _SRGB here when using
|
|
||||||
// DXGI_SWAP_EFFECT_FLIP_* ...;
|
|
||||||
desc.Stereo = FALSE;
|
|
||||||
desc.SampleDesc.Count = 1;
|
|
||||||
desc.SampleDesc.Quality = 0;
|
|
||||||
desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
||||||
desc.BufferCount = 2;
|
|
||||||
desc.Scaling = DXGI_SCALING_STRETCH;
|
|
||||||
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
|
||||||
desc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
|
|
||||||
desc.Flags = 0;
|
|
||||||
|
|
||||||
if (FAILED(g_gpu.dxgi_factory->CreateSwapChainForHwnd(g_gpu.device.Get(),
|
|
||||||
hwnd,
|
|
||||||
&desc,
|
|
||||||
nullptr,
|
|
||||||
nullptr,
|
|
||||||
&swc.swap_chain))) {
|
|
||||||
rtReportError("dx11", "Failed to create the swap chain.");
|
|
||||||
return swc;
|
|
||||||
}
|
|
||||||
|
|
||||||
ID3D11Texture2D *frame_buffer;
|
|
||||||
if (FAILED(swc.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) {
|
|
||||||
rtReportError("dx11", "Failed to retrieve the backbuffer.");
|
|
||||||
swc.swap_chain.Reset();
|
|
||||||
return swc;
|
|
||||||
}
|
|
||||||
|
|
||||||
D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
|
|
||||||
rtv_desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
|
|
||||||
rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
|
|
||||||
|
|
||||||
if (FAILED(g_gpu.device->CreateRenderTargetView(frame_buffer, &rtv_desc, &swc.rtv))) {
|
|
||||||
rtReportError("dx11", "Failed to create the render target view for the backbuffer.");
|
|
||||||
swc.swap_chain.Reset();
|
|
||||||
return swc;
|
|
||||||
}
|
|
||||||
|
|
||||||
return swc;
|
|
||||||
}
|
|
||||||
|
|
||||||
static IDXGIAdapter *RetrieveSelectedAdapter(void) {
|
|
||||||
ComPtr<IDXGIFactory2> factory;
|
|
||||||
if (FAILED(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory)))) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
UINT i = 0;
|
|
||||||
IDXGIAdapter *adapter;
|
|
||||||
while (factory->EnumAdapters(i, &adapter) == S_OK) {
|
|
||||||
++i;
|
|
||||||
|
|
||||||
DXGI_ADAPTER_DESC desc;
|
|
||||||
adapter->GetDesc(&desc);
|
|
||||||
|
|
||||||
char utf8_desc[256];
|
|
||||||
rtWStrToUTF8(desc.Description, utf8_desc, 256);
|
|
||||||
|
|
||||||
if (strncmp(utf8_desc, rt_Dx11AdapterName.s, 256) == 0)
|
|
||||||
return adapter;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern rt_result InitCommandBufferManagement();
|
|
||||||
extern void ShutdownCommandBufferManagement();
|
|
||||||
extern rt_result InitRenderTargetManagement();
|
|
||||||
extern void ShutdownRenderTargetManagement();
|
|
||||||
extern rt_result InitBufferManagement();
|
|
||||||
extern void ShutdownBufferManagement();
|
|
||||||
extern rt_result InitPipelineManagement();
|
|
||||||
extern void ShutdownPipelineManagement();
|
|
||||||
|
|
||||||
extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
|
||||||
constexpr D3D_FEATURE_LEVEL feature_levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0};
|
|
||||||
UINT device_flags = 0;
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
device_flags |= D3D11_CREATE_DEVICE_DEBUG;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
IDXGIAdapter *selected_adapter = RetrieveSelectedAdapter();
|
|
||||||
|
|
||||||
ID3D11Device *base_device;
|
|
||||||
ID3D11DeviceContext *base_context;
|
|
||||||
if (FAILED(D3D11CreateDevice(selected_adapter,
|
|
||||||
D3D_DRIVER_TYPE_HARDWARE,
|
|
||||||
nullptr,
|
|
||||||
device_flags,
|
|
||||||
feature_levels,
|
|
||||||
RT_ARRAY_COUNT(feature_levels),
|
|
||||||
D3D11_SDK_VERSION,
|
|
||||||
&base_device,
|
|
||||||
&g_gpu.feature_level,
|
|
||||||
&base_context))) {
|
|
||||||
rtLog("dx11", "Feature level 11.1 creation failed, retrying with feature level 11.0");
|
|
||||||
if (FAILED(D3D11CreateDevice(selected_adapter,
|
|
||||||
D3D_DRIVER_TYPE_HARDWARE,
|
|
||||||
nullptr,
|
|
||||||
device_flags,
|
|
||||||
&feature_levels[1],
|
|
||||||
RT_ARRAY_COUNT(feature_levels) - 1,
|
|
||||||
D3D11_SDK_VERSION,
|
|
||||||
&base_device,
|
|
||||||
&g_gpu.feature_level,
|
|
||||||
&base_context))) {
|
|
||||||
rtReportError("dx11", "Failed to create the d3d11 device.");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FAILED(base_device->QueryInterface(IID_PPV_ARGS(&g_gpu.device)))) {
|
|
||||||
rtReportError("dx11", "Failed to query the D3D11Device1 interface.");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
if (FAILED(base_context->QueryInterface(IID_PPV_ARGS(&g_gpu.device_context)))) {
|
|
||||||
rtReportError("dx11", "Failed to query the D3D11DeviceContext1 interface.");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
IDXGIDevice1 *dxgi_device;
|
|
||||||
if (FAILED(g_gpu.device->QueryInterface(&dxgi_device))) {
|
|
||||||
rtReportError("dx11", "Failed to query the DXGIDevice1 interface.");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
IDXGIAdapter *adapter;
|
|
||||||
if (FAILED(dxgi_device->GetAdapter(&adapter))) {
|
|
||||||
rtReportError("dx11", "Failed to retrieve the dxgi adapter.");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
if (FAILED(adapter->GetParent(IID_PPV_ARGS(&g_gpu.dxgi_factory)))) {
|
|
||||||
rtReportError("dx11", "Failed to retrieve the dxgi factory.");
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
g_gpu.device->CheckFeatureSupport(D3D11_FEATURE_THREADING,
|
|
||||||
&g_gpu.threading_support,
|
|
||||||
sizeof(g_gpu.threading_support));
|
|
||||||
|
|
||||||
g_gpu.swap_chain = CreateSwapChain(info->hWnd);
|
|
||||||
|
|
||||||
g_gpu.context_lock = rtCreateMutex();
|
|
||||||
|
|
||||||
rt_result res = InitCommandBufferManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitRenderTargetManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitBufferManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitPipelineManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(Shutdown)(void) {
|
|
||||||
ShutdownPipelineManagement();
|
|
||||||
ShutdownBufferManagement();
|
|
||||||
ShutdownRenderTargetManagement();
|
|
||||||
ShutdownCommandBufferManagement();
|
|
||||||
rtDestroyMutex(g_gpu.context_lock);
|
|
||||||
g_gpu.swap_chain.rtv.Reset();
|
|
||||||
g_gpu.swap_chain.swap_chain.Reset();
|
|
||||||
g_gpu.dxgi_factory.Reset();
|
|
||||||
g_gpu.device.Reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
|
|
||||||
return RT_DX11_MAX_FRAMES_IN_FLIGHT;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
|
|
||||||
RT_UNUSED(frame_id);
|
|
||||||
FLOAT clear_color[4] = {
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
};
|
|
||||||
rtLockMutex(g_gpu.context_lock);
|
|
||||||
g_gpu.device_context->ClearRenderTargetView(g_gpu.swap_chain.rtv.Get(), clear_color);
|
|
||||||
rtUnlockMutex(g_gpu.context_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
|
|
||||||
RT_UNUSED(frame_id);
|
|
||||||
|
|
||||||
rtLockMutex(g_gpu.context_lock);
|
|
||||||
UINT sync_interval = rt_Dx11VSync.i ? 1 : 0;
|
|
||||||
g_gpu.swap_chain.swap_chain->Present(sync_interval, 0);
|
|
||||||
rtUnlockMutex(g_gpu.context_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copied from null. Delete once no longer needed
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
|
|
||||||
#define RETURN_HANDLE_STUB2(type, initial) \
|
|
||||||
static unsigned int s_next = (initial); \
|
|
||||||
s_next = (s_next + 1) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX; \
|
|
||||||
type h = { \
|
|
||||||
1, \
|
|
||||||
s_next, \
|
|
||||||
}; \
|
|
||||||
return h;
|
|
||||||
|
|
||||||
#define RETURN_HANDLE_STUB(type) RETURN_HANDLE_STUB2(type, 1)
|
|
||||||
|
|
||||||
#define RETURN_HANDLE_ARRAY_STUB2(out, count, initial) \
|
|
||||||
static unsigned int s_next = (initial); \
|
|
||||||
for (uint32_t i = 0; i < (count); ++i) { \
|
|
||||||
(out)[i].index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX; \
|
|
||||||
(out)[i].version = 1; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define RETURN_HANDLE_ARRAY_STUB(out, count) RETURN_HANDLE_ARRAY_STUB2(out, count, 1)
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count,
|
|
||||||
const rt_gpu_semaphore_info *info,
|
|
||||||
rt_gpu_semaphore_handle *p_semaphores) {
|
|
||||||
RT_UNUSED(info);
|
|
||||||
RETURN_HANDLE_ARRAY_STUB2(p_semaphores, count, 3)
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) {
|
|
||||||
RT_UNUSED(count);
|
|
||||||
RT_UNUSED(semaphores);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* NOTE(Kevin): It might become necessary to actually track the value, to correctly simulate gpu
|
|
||||||
* behaviour */
|
|
||||||
uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle sem) {
|
|
||||||
RT_UNUSED(sem);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) {
|
|
||||||
return {1, 1};
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) {
|
|
||||||
return {1, 2};
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,31 +0,0 @@
|
|||||||
if get_option('build_dx11')
|
|
||||||
dx11_dep = declare_dependency(link_args: ['-ld3d11', '-ldxgi', '-lwinmm', '-ldxguid'])
|
|
||||||
|
|
||||||
dx11_renderer_lib = library('rtdx11',
|
|
||||||
# Project Sources
|
|
||||||
'device_objects.hpp',
|
|
||||||
'gpu.hpp',
|
|
||||||
|
|
||||||
'../common/common_render_graph.h',
|
|
||||||
|
|
||||||
'buffers.cpp',
|
|
||||||
'commands.cpp',
|
|
||||||
'command_buffers.cpp',
|
|
||||||
'helpers.cpp',
|
|
||||||
'init.cpp',
|
|
||||||
'pipelines.cpp',
|
|
||||||
'render_graph.cpp',
|
|
||||||
'render_targets.cpp',
|
|
||||||
|
|
||||||
'../common/common_render_graph.c',
|
|
||||||
|
|
||||||
dependencies : [m_dep, windowing_dep, dx11_dep],
|
|
||||||
include_directories : [engine_incdir, contrib_incdir],
|
|
||||||
link_with : [runtime_lib],
|
|
||||||
cpp_pch : 'pch/dx11_pch.h',
|
|
||||||
override_options : ['b_sanitize=none'],
|
|
||||||
install : true)
|
|
||||||
|
|
||||||
engine_libs += dx11_renderer_lib
|
|
||||||
engine_lib_paths += dx11_renderer_lib.full_path()
|
|
||||||
endif
|
|
@ -1,5 +0,0 @@
|
|||||||
// DX11 headers
|
|
||||||
#include <wrl.h>
|
|
||||||
#include <d3d11.h>
|
|
||||||
#include <d3d11_1.h>
|
|
||||||
#include <dxgi1_3.h>
|
|
@ -1,238 +0,0 @@
|
|||||||
#include <d3d11.h>
|
|
||||||
#include <d3d11_1.h>
|
|
||||||
|
|
||||||
#include "gfx/effect.h"
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/threading_helpers.hpp"
|
|
||||||
|
|
||||||
#include "device_objects.hpp"
|
|
||||||
#include "gpu.hpp"
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_Dx11MaxPipelines,
|
|
||||||
"Maximum number of simultaneously existing pipelines. Default: 128",
|
|
||||||
128);
|
|
||||||
|
|
||||||
static rt_pipeline *_pipelines;
|
|
||||||
static rt_pipeline *_first_free;
|
|
||||||
static rt_mutex *_lock;
|
|
||||||
|
|
||||||
rt_result InitPipelineManagement() {
|
|
||||||
_pipelines =
|
|
||||||
reinterpret_cast<rt_pipeline *>(calloc((size_t)rt_Dx11MaxPipelines.i, sizeof(rt_pipeline)));
|
|
||||||
if (!_pipelines)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
|
|
||||||
_first_free = _pipelines + 1;
|
|
||||||
for (int i = 0; i < rt_Dx11MaxPipelines.i - 1; ++i)
|
|
||||||
_pipelines[i].next_free = &_pipelines[i + 1];
|
|
||||||
|
|
||||||
_lock = rtCreateMutex();
|
|
||||||
if (!_lock) {
|
|
||||||
free(_pipelines);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownPipelineManagement() {
|
|
||||||
for (int i = 0; i < rt_Dx11MaxPipelines.i; ++i) {
|
|
||||||
if (_pipelines[i].compute_shader)
|
|
||||||
_pipelines[i].compute_shader->Release();
|
|
||||||
if (_pipelines[i].vertex_shader)
|
|
||||||
_pipelines[i].vertex_shader->Release();
|
|
||||||
if (_pipelines[i].pixel_shader)
|
|
||||||
_pipelines[i].pixel_shader->Release();
|
|
||||||
if (_pipelines[i].input_layout)
|
|
||||||
_pipelines[i].input_layout->Release();
|
|
||||||
}
|
|
||||||
free(_pipelines);
|
|
||||||
rtDestroyMutex(_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result GetShader(rt_resource_id id, rt_shader_info **p_shader, rt_arena *arena) {
|
|
||||||
size_t shader_size = rtGetResourceSize(id);
|
|
||||||
if (shader_size == 0)
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
void *buffer = rtArenaPush(arena, shader_size);
|
|
||||||
if (!buffer)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
rt_result res = rtGetResource(id, buffer);
|
|
||||||
if (res != RT_SUCCESS) {
|
|
||||||
rtArenaPop(arena, shader_size);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_resource *resource = reinterpret_cast<rt_resource *>(buffer);
|
|
||||||
RT_ASSERT(resource->type == RT_RESOURCE_SHADER, "Expected a shader");
|
|
||||||
*p_shader = reinterpret_cast<rt_shader_info *>(resource->data);
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
|
|
||||||
rt_pipeline *slot = nullptr;
|
|
||||||
{
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
|
|
||||||
slot = _first_free;
|
|
||||||
if (slot)
|
|
||||||
_first_free = slot->next_free;
|
|
||||||
}
|
|
||||||
if (!slot) {
|
|
||||||
rtLog("dx11", "Could not create pipeline, because no slots are available.");
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
|
|
||||||
if (info->vertex_shader != RT_INVALID_RESOURCE_ID) {
|
|
||||||
rt_shader_info *vs;
|
|
||||||
if (GetShader(info->vertex_shader, &vs, temp.arena) != RT_SUCCESS) {
|
|
||||||
rtReportError("dx11", "Could not retrieve vertex shader data.");
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
void *bytecode = rtResolveRelptr(&vs->bytecode);
|
|
||||||
if (FAILED(g_gpu.device->CreateVertexShader(bytecode,
|
|
||||||
vs->bytecode_length,
|
|
||||||
NULL,
|
|
||||||
&slot->vertex_shader))) {
|
|
||||||
rtReportError("dx11", "Vertex shader creation failed");
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: effects should specify the expected vertex layout
|
|
||||||
// For now, we use a default
|
|
||||||
/* clang-format off */
|
|
||||||
D3D11_INPUT_ELEMENT_DESC default_layout[] = {
|
|
||||||
{"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
||||||
{"NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
||||||
{"TANGENT", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
||||||
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
||||||
};
|
|
||||||
/* clang-format on */
|
|
||||||
if (FAILED(g_gpu.device->CreateInputLayout(default_layout,
|
|
||||||
RT_ARRAY_COUNT(default_layout),
|
|
||||||
bytecode,
|
|
||||||
vs->bytecode_length,
|
|
||||||
&slot->input_layout))) {
|
|
||||||
rtReportError("dx11", "Failed to create the vertex layout.");
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info->fragment_shader != RT_INVALID_RESOURCE_ID) {
|
|
||||||
rt_shader_info *vs;
|
|
||||||
if (GetShader(info->fragment_shader, &vs, temp.arena) != RT_SUCCESS) {
|
|
||||||
rtReportError("dx11", "Could not retrieve fragment shader data.");
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
void *bytecode = rtResolveRelptr(&vs->bytecode);
|
|
||||||
if (FAILED(g_gpu.device->CreatePixelShader(bytecode,
|
|
||||||
vs->bytecode_length,
|
|
||||||
NULL,
|
|
||||||
&slot->pixel_shader))) {
|
|
||||||
rtReportError("dx11", "Fragment shader creation failed");
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info->compute_shader != RT_INVALID_RESOURCE_ID) {
|
|
||||||
rt_shader_info *vs;
|
|
||||||
if (GetShader(info->compute_shader, &vs, temp.arena) != RT_SUCCESS) {
|
|
||||||
rtReportError("dx11", "Could not retrieve compute shader data.");
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
void *bytecode = rtResolveRelptr(&vs->bytecode);
|
|
||||||
if (FAILED(g_gpu.device->CreateComputeShader(bytecode,
|
|
||||||
vs->bytecode_length,
|
|
||||||
NULL,
|
|
||||||
&slot->compute_shader))) {
|
|
||||||
rtReportError("dx11", "Compute shader creation failed");
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Effects should specifiy the rasterizer state
|
|
||||||
// DX11 only supports up to 4096 rasterizer state objects.
|
|
||||||
// We could cache these and only create the distinct objects.
|
|
||||||
D3D11_RASTERIZER_DESC rasterizer_desc;
|
|
||||||
rasterizer_desc.FillMode = D3D11_FILL_SOLID;
|
|
||||||
rasterizer_desc.CullMode = D3D11_CULL_NONE;
|
|
||||||
rasterizer_desc.FrontCounterClockwise = TRUE;
|
|
||||||
rasterizer_desc.DepthBias = 0;
|
|
||||||
rasterizer_desc.DepthBiasClamp = 0.f;
|
|
||||||
rasterizer_desc.SlopeScaledDepthBias = 0.f;
|
|
||||||
rasterizer_desc.DepthClipEnable = TRUE;
|
|
||||||
rasterizer_desc.ScissorEnable = FALSE;
|
|
||||||
rasterizer_desc.MultisampleEnable = TRUE;
|
|
||||||
rasterizer_desc.AntialiasedLineEnable = TRUE;
|
|
||||||
if (FAILED(g_gpu.device->CreateRasterizerState(&rasterizer_desc, &slot->rasterizer_state))) {
|
|
||||||
rtReportError("dx11", "Rasterizer state creation failed");
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
uint32_t index = static_cast<uint32_t>(slot - _pipelines);
|
|
||||||
return {slot->version, index};
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
|
|
||||||
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxPipelines.i)
|
|
||||||
return;
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
if (handle.version != _pipelines[handle.index].version)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (_pipelines[handle.index].compute_shader)
|
|
||||||
_pipelines[handle.index].compute_shader->Release();
|
|
||||||
if (_pipelines[handle.index].vertex_shader)
|
|
||||||
_pipelines[handle.index].vertex_shader->Release();
|
|
||||||
if (_pipelines[handle.index].pixel_shader)
|
|
||||||
_pipelines[handle.index].pixel_shader->Release();
|
|
||||||
if (_pipelines[handle.index].input_layout)
|
|
||||||
_pipelines[handle.index].input_layout->Release();
|
|
||||||
|
|
||||||
_pipelines[handle.index].next_free = _first_free;
|
|
||||||
_pipelines[handle.index].version =
|
|
||||||
(_pipelines[handle.index].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
_first_free = &_pipelines[handle.index];
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_pipeline *rtGetPipeline(rt_pipeline_handle handle) {
|
|
||||||
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxPipelines.i)
|
|
||||||
return nullptr;
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
if (handle.version != _pipelines[handle.index].version)
|
|
||||||
return nullptr;
|
|
||||||
return &_pipelines[handle.index];
|
|
||||||
}
|
|
@ -1,229 +0,0 @@
|
|||||||
#include "gfx/render_view.h"
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "renderer/common/common_render_graph.h"
|
|
||||||
|
|
||||||
#include "device_objects.hpp"
|
|
||||||
#include "gpu.hpp"
|
|
||||||
|
|
||||||
static constexpr unsigned int MAX_SUBMITTED_VIEWS_PER_PASS = 4;
|
|
||||||
|
|
||||||
struct rt_pass_runtime_data {
|
|
||||||
rt_render_view views[RT_DX11_MAX_FRAMES_IN_FLIGHT][MAX_SUBMITTED_VIEWS_PER_PASS];
|
|
||||||
uint32_t view_count[RT_DX11_MAX_FRAMES_IN_FLIGHT];
|
|
||||||
unsigned int views_frame_id[RT_DX11_MAX_FRAMES_IN_FLIGHT];
|
|
||||||
};
|
|
||||||
|
|
||||||
static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) {
|
|
||||||
return rtCreateRenderTarget({.format = rtinfo->format,
|
|
||||||
.width = rtinfo->width,
|
|
||||||
.height = rtinfo->height,
|
|
||||||
.name = rtinfo->name});
|
|
||||||
}
|
|
||||||
|
|
||||||
static int RequireExplicitSynchronization() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t GetRuntimeDataSize() {
|
|
||||||
return sizeof(rt_pass_runtime_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
|
|
||||||
rt_render_graph_builder_platform_callbacks cbs{};
|
|
||||||
cbs.CreateRenderTarget = CreateRenderTarget;
|
|
||||||
cbs.RequireExplicitSynchronization = RequireExplicitSynchronization;
|
|
||||||
cbs.GetRuntimeDataSize = GetRuntimeDataSize;
|
|
||||||
return rtCreateRenderGraphBuilder(&cbs);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
|
|
||||||
rtDestroyRenderGraphBuilder(builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
|
|
||||||
uint32_t pass_id,
|
|
||||||
rt_render_view view,
|
|
||||||
unsigned int frame_id) {
|
|
||||||
for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
|
|
||||||
if (render_graph->passes[i].id == pass_id) {
|
|
||||||
rt_render_pass *pass = &render_graph->passes[i];
|
|
||||||
rt_pass_runtime_data *runtime_data =
|
|
||||||
reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
|
|
||||||
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
|
|
||||||
RT_ASSERT(runtime_data->views_frame_id[frame_slot] == frame_id ||
|
|
||||||
runtime_data->views_frame_id[frame_slot] == 0,
|
|
||||||
"Tried to submit a view for a not-current frame.");
|
|
||||||
if (!RT_VERIFY(runtime_data->view_count[frame_slot] < MAX_SUBMITTED_VIEWS_PER_PASS))
|
|
||||||
return;
|
|
||||||
runtime_data->views[frame_slot][runtime_data->view_count[frame_slot]++] = view;
|
|
||||||
runtime_data->views_frame_id[frame_slot] = frame_id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph,
|
|
||||||
unsigned int frame_id) {
|
|
||||||
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
|
|
||||||
for (uint32_t i = 0; i < graph->pass_count; ++i) {
|
|
||||||
rt_pass_runtime_data *runtime_data =
|
|
||||||
reinterpret_cast<rt_pass_runtime_data *>(graph->passes[i].runtime_data);
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
memset(runtime_data->views[frame_slot], 0, sizeof(runtime_data->views[frame_slot]));
|
|
||||||
#endif
|
|
||||||
runtime_data->view_count[frame_slot] = 0;
|
|
||||||
runtime_data->views_frame_id[frame_slot] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdbuf_handle, unsigned int frame_id) {
|
|
||||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdbuf_handle);
|
|
||||||
if (!RT_VERIFY(cmd))
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
|
|
||||||
if (cmd->annotation) {
|
|
||||||
WCHAR wname[128];
|
|
||||||
if (rtUTF8ToWStr(pass->name, wname, sizeof(wname)) == RT_SUCCESS)
|
|
||||||
cmd->annotation->BeginEvent(wname);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Setup rtvs
|
|
||||||
ID3D11RenderTargetView *rtvs[4];
|
|
||||||
ID3D11DepthStencilView *dsv = nullptr;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < pass->color_output_count; ++i) {
|
|
||||||
rt_render_target *rt = rtGetRenderTarget(pass->color_outputs[i]);
|
|
||||||
if (!RT_VERIFY(rt))
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target");
|
|
||||||
rtvs[i] = rt->rtv;
|
|
||||||
|
|
||||||
if (pass->color_loads[i] == RT_PASS_LOAD_MODE_CLEAR) {
|
|
||||||
FLOAT color[4] = {
|
|
||||||
pass->color_clear_values[i].r,
|
|
||||||
pass->color_clear_values[i].g,
|
|
||||||
pass->color_clear_values[i].b,
|
|
||||||
pass->color_clear_values[i].a,
|
|
||||||
};
|
|
||||||
cmd->context->ClearRenderTargetView(rt->rtv, color);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_target *dsvrt = rtGetRenderTarget(pass->depth_stencil);
|
|
||||||
if (dsvrt) {
|
|
||||||
RT_ASSERT(dsvrt->IsDepthStencilTarget(),
|
|
||||||
"Need to provide a valid depth stencil render target");
|
|
||||||
dsv = dsvrt->dsv;
|
|
||||||
|
|
||||||
if (pass->depth_stencil_load == RT_PASS_LOAD_MODE_CLEAR)
|
|
||||||
cmd->context->ClearDepthStencilView(
|
|
||||||
dsv,
|
|
||||||
(dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL
|
|
||||||
: D3D11_CLEAR_DEPTH,
|
|
||||||
pass->depth_stencil_clear_value.depth,
|
|
||||||
static_cast<UINT8>(pass->depth_stencil_clear_value.stencil));
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd->context->OMSetRenderTargets(static_cast<UINT>(pass->color_output_count), rtvs, dsv);
|
|
||||||
|
|
||||||
D3D11_VIEWPORT viewport;
|
|
||||||
viewport.TopLeftX = pass->render_area.offset.x;
|
|
||||||
viewport.TopLeftY = pass->render_area.offset.y;
|
|
||||||
viewport.Width = pass->render_area.size.x;
|
|
||||||
viewport.Height = pass->render_area.size.y;
|
|
||||||
viewport.MinDepth = pass->min_depth;
|
|
||||||
viewport.MaxDepth = pass->max_depth;
|
|
||||||
|
|
||||||
if (viewport.Width == 0 || viewport.Height == 0) {
|
|
||||||
DXGI_SWAP_CHAIN_DESC desc;
|
|
||||||
g_gpu.swap_chain.swap_chain->GetDesc(&desc);
|
|
||||||
if (viewport.Width == 0)
|
|
||||||
viewport.Width = static_cast<float>(desc.BufferDesc.Width);
|
|
||||||
if (viewport.Height == 0)
|
|
||||||
viewport.Height = static_cast<float>(desc.BufferDesc.Height);
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd->context->RSSetViewports(1, &viewport);
|
|
||||||
|
|
||||||
auto runtime_data = reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
|
|
||||||
RT_VERIFY(runtime_data);
|
|
||||||
|
|
||||||
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
|
|
||||||
rt_result res = RT_VERIFY(pass->Execute)(pass->id,
|
|
||||||
cmdbuf_handle,
|
|
||||||
runtime_data->views[frame_slot],
|
|
||||||
runtime_data->view_count[frame_slot],
|
|
||||||
pass->user_data);
|
|
||||||
|
|
||||||
if (cmd->annotation) {
|
|
||||||
cmd->annotation->EndEvent();
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool IsCopyResourcePossible(const rt_render_target *backbuffer) {
|
|
||||||
DXGI_SWAP_CHAIN_DESC scd;
|
|
||||||
g_gpu.swap_chain.swap_chain->GetDesc(&scd);
|
|
||||||
|
|
||||||
D3D11_TEXTURE2D_DESC td;
|
|
||||||
backbuffer->texture->GetDesc(&td);
|
|
||||||
|
|
||||||
// This is more strict than necessary, because the formats could also be from the same group
|
|
||||||
return scd.BufferDesc.Width == td.Width && scd.BufferDesc.Height == td.Height &&
|
|
||||||
scd.SampleDesc.Count == td.SampleDesc.Count && scd.BufferDesc.Format == td.Format;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph, unsigned int frame_id) {
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
if (!temp.arena)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
|
|
||||||
// Alloc a command buffer for every pass
|
|
||||||
rt_command_buffer_handle *cmdbufs =
|
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, rt_command_buffer_handle, render_graph->pass_count);
|
|
||||||
rt_result res = rtAllocCommandBuffers(render_graph->pass_count, cmdbufs);
|
|
||||||
if (res != RT_SUCCESS) {
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
|
|
||||||
rt_render_pass *pass = &render_graph->passes[i];
|
|
||||||
|
|
||||||
res = ExecutePass(pass, cmdbufs[i], frame_id);
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (res == RT_SUCCESS) {
|
|
||||||
res = rtSubmitCommandBuffers(render_graph->pass_count, cmdbufs);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy backbuffer to swapchain
|
|
||||||
rt_render_target *backbuffer =
|
|
||||||
rtGetRenderTarget(render_graph->render_targets[render_graph->backbuffer_index]);
|
|
||||||
if (!backbuffer) {
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return RT_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
ID3D11Texture2D *frame_buffer;
|
|
||||||
if (FAILED(g_gpu.swap_chain.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) {
|
|
||||||
rtReportError("dx11", "Failed to retrieve the backbuffer.");
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (IsCopyResourcePossible(backbuffer)) {
|
|
||||||
g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
|
|
||||||
} else {
|
|
||||||
// NOTE(Kevin): The most flexible solution would probably be a fullscreen tri draw
|
|
||||||
// that implements a blit.
|
|
||||||
// Another idea would be a compute shader that does a copy&filter but that requires more
|
|
||||||
// work
|
|
||||||
RT_NOT_IMPLEMENTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return res;
|
|
||||||
}
|
|
@ -1,182 +0,0 @@
|
|||||||
#include <d3d11.h>
|
|
||||||
#include <d3d11_1.h>
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/threading_helpers.hpp"
|
|
||||||
|
|
||||||
#include "device_objects.hpp"
|
|
||||||
#include "gpu.hpp"
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_Dx11MaxRenderTargets,
|
|
||||||
"Maximum number of simultaneously existing render targets. Default: 128",
|
|
||||||
128);
|
|
||||||
|
|
||||||
static rt_render_target *_render_targets;
|
|
||||||
static rt_render_target *_first_free;
|
|
||||||
static rt_mutex *_lock;
|
|
||||||
|
|
||||||
rt_result InitRenderTargetManagement() {
|
|
||||||
_render_targets = reinterpret_cast<rt_render_target *>(
|
|
||||||
calloc((size_t)rt_Dx11MaxRenderTargets.i, sizeof(rt_render_target)));
|
|
||||||
if (!_render_targets) {
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
_lock = rtCreateMutex();
|
|
||||||
if (!_lock) {
|
|
||||||
free(_render_targets);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
_render_targets[1].rtv = g_gpu.swap_chain.rtv.Get();
|
|
||||||
_render_targets[1].format = RT_PIXEL_FORMAT_B8G8R8A8_SRGB;
|
|
||||||
_render_targets[1].version = 1;
|
|
||||||
|
|
||||||
_first_free = _render_targets + 2;
|
|
||||||
for (int i = 0; i < rt_Dx11MaxRenderTargets.i; ++i) {
|
|
||||||
_render_targets[i].next_free = &_render_targets[i + 1];
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownRenderTargetManagement() {
|
|
||||||
// Swapchain rtv in slot 1 will be released elsewhere
|
|
||||||
for (int i = 2; i < rt_Dx11MaxRenderTargets.i; ++i) {
|
|
||||||
if (_render_targets[i].rtv)
|
|
||||||
_render_targets[i].rtv->Release();
|
|
||||||
if (_render_targets[i].dsv)
|
|
||||||
_render_targets[i].dsv->Release();
|
|
||||||
if (_render_targets[i].texture)
|
|
||||||
_render_targets[i].texture->Release();
|
|
||||||
}
|
|
||||||
free(_render_targets);
|
|
||||||
rtDestroyMutex(_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info) {
|
|
||||||
rt_render_target *slot = nullptr;
|
|
||||||
{
|
|
||||||
auto lock_guard = rtAutoLock(_lock);
|
|
||||||
slot = _first_free;
|
|
||||||
_first_free = slot->next_free;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!slot) {
|
|
||||||
rtLog("dx11",
|
|
||||||
"Could not create a new render target, because all available slots are currently in "
|
|
||||||
"use.");
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
slot->format = info.format;
|
|
||||||
|
|
||||||
uint32_t swapchain_width = 0, swapchain_height = 0;
|
|
||||||
if (info.width == RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
|
||||||
info.height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
|
|
||||||
|
|
||||||
DXGI_SWAP_CHAIN_DESC desc;
|
|
||||||
g_gpu.swap_chain.swap_chain->GetDesc(&desc);
|
|
||||||
swapchain_width = desc.BufferDesc.Width;
|
|
||||||
swapchain_height = desc.BufferDesc.Height;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!rtIsDepthFormat(info.format)) {
|
|
||||||
D3D11_TEXTURE2D_DESC tex_desc = {};
|
|
||||||
tex_desc.Width =
|
|
||||||
(info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width;
|
|
||||||
tex_desc.Height =
|
|
||||||
(info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height;
|
|
||||||
tex_desc.MipLevels = 1;
|
|
||||||
tex_desc.ArraySize = 1;
|
|
||||||
tex_desc.Format = rtConvertPixelFormat(info.format);
|
|
||||||
tex_desc.SampleDesc.Count = 1;
|
|
||||||
tex_desc.SampleDesc.Quality = 0;
|
|
||||||
tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write
|
|
||||||
tex_desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
|
|
||||||
tex_desc.CPUAccessFlags = 0; // none
|
|
||||||
tex_desc.MiscFlags = 0;
|
|
||||||
if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) {
|
|
||||||
rtLog("dx11", "Failed to create backing texture for render target %s", info.name);
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
|
|
||||||
rtv_desc.Format = rtConvertPixelFormat(info.format);
|
|
||||||
rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
|
|
||||||
rtv_desc.Texture2D.MipSlice = 0;
|
|
||||||
if (FAILED(g_gpu.device->CreateRenderTargetView(slot->texture, &rtv_desc, &slot->rtv))) {
|
|
||||||
slot->texture->Release();
|
|
||||||
rtLog("dx11",
|
|
||||||
"Failed to create the render target view for render target %s",
|
|
||||||
info.name);
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
uint32_t index = static_cast<uint32_t>(slot - _render_targets);
|
|
||||||
return {.version = slot->version, .index = index};
|
|
||||||
} else {
|
|
||||||
D3D11_TEXTURE2D_DESC tex_desc = {};
|
|
||||||
tex_desc.Width =
|
|
||||||
(info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width;
|
|
||||||
tex_desc.Height =
|
|
||||||
(info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height;
|
|
||||||
tex_desc.MipLevels = 1;
|
|
||||||
tex_desc.ArraySize = 1;
|
|
||||||
tex_desc.Format = rtConvertPixelFormat(info.format);
|
|
||||||
tex_desc.SampleDesc.Count = 1;
|
|
||||||
tex_desc.SampleDesc.Quality = 0;
|
|
||||||
tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write
|
|
||||||
tex_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL;
|
|
||||||
tex_desc.CPUAccessFlags = 0; // none
|
|
||||||
tex_desc.MiscFlags = 0;
|
|
||||||
if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) {
|
|
||||||
rtLog("dx11", "Failed to create backing texture for render target %s", info.name);
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
D3D11_DEPTH_STENCIL_VIEW_DESC dsv_desc = {};
|
|
||||||
dsv_desc.Format = rtConvertPixelFormat(info.format);
|
|
||||||
dsv_desc.Flags = 0;
|
|
||||||
dsv_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D;
|
|
||||||
dsv_desc.Texture2D.MipSlice = 0;
|
|
||||||
if (FAILED(g_gpu.device->CreateDepthStencilView(slot->texture, &dsv_desc, &slot->dsv))) {
|
|
||||||
slot->texture->Release();
|
|
||||||
rtLog("dx11",
|
|
||||||
"Failed to create the depth stencil view for render target %s",
|
|
||||||
info.name);
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
return RT_INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
uint32_t index = static_cast<uint32_t>(slot - _render_targets);
|
|
||||||
return {.version = slot->version, .index = index};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void rtDestroyRenderTarget(rt_render_target_handle handle) {
|
|
||||||
RT_UNUSED(handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle) {
|
|
||||||
if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxRenderTargets.i)
|
|
||||||
return nullptr;
|
|
||||||
auto lg = rtAutoLock(_lock);
|
|
||||||
if (_render_targets[handle.index].version != handle.version)
|
|
||||||
return nullptr;
|
|
||||||
return &_render_targets[handle.index];
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
null_renderer_lib = library('rtnull',
|
|
||||||
'null.c',
|
|
||||||
'../common/common_render_graph.c',
|
|
||||||
include_directories : engine_incdir,
|
|
||||||
link_with : runtime_lib,
|
|
||||||
install : true)
|
|
||||||
|
|
||||||
engine_libs += null_renderer_lib
|
|
||||||
engine_lib_paths += null_renderer_lib.full_path()
|
|
||||||
|
|
@ -1,159 +0,0 @@
|
|||||||
/* "Null" renderer implementation.
|
|
||||||
* Useful for headless testing */
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
#include "../common/common_render_graph.h"
|
|
||||||
|
|
||||||
#define RETURN_HANDLE_STUB2(type, initial) \
|
|
||||||
static unsigned int s_next = (initial); \
|
|
||||||
return (type) { .index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX, .version = 1 }
|
|
||||||
|
|
||||||
#define RETURN_HANDLE_STUB(type) RETURN_HANDLE_STUB2(type, 1)
|
|
||||||
|
|
||||||
#define RETURN_HANDLE_ARRAY_STUB2(out, count, initial) \
|
|
||||||
static unsigned int s_next = (initial); \
|
|
||||||
for (uint32_t i = 0; i < (count); ++i) { \
|
|
||||||
(out)[i].index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX; \
|
|
||||||
(out)[i].version = 1; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define RETURN_HANDLE_ARRAY_STUB(out, count) RETURN_HANDLE_ARRAY_STUB2(out, count, 1)
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(RegisterCVars)(void) {
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
|
||||||
RT_UNUSED(info);
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(Shutdown)(void) {
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
|
|
||||||
RT_UNUSED(frame_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
|
|
||||||
RT_UNUSED(frame_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
|
|
||||||
RT_UNUSED(info);
|
|
||||||
RETURN_HANDLE_STUB(rt_pipeline_handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
|
|
||||||
RT_UNUSED(handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
|
||||||
const rt_alloc_command_buffer_info *info,
|
|
||||||
rt_command_buffer_handle *p_command_buffers) {
|
|
||||||
RT_UNUSED(info);
|
|
||||||
RETURN_HANDLE_ARRAY_STUB(p_command_buffers, count)
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
|
|
||||||
const rt_submit_command_buffers_info *info) {
|
|
||||||
RT_UNUSED(queue);
|
|
||||||
RT_UNUSED(info);
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
|
|
||||||
const rt_buffer_info *info,
|
|
||||||
rt_buffer_handle *p_buffers) {
|
|
||||||
RT_UNUSED(info);
|
|
||||||
RETURN_HANDLE_ARRAY_STUB(p_buffers, count);
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
|
|
||||||
RT_UNUSED(count);
|
|
||||||
RT_UNUSED(buffers);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmd,
|
|
||||||
const rt_cmd_begin_pass_info *info) {
|
|
||||||
RT_UNUSED(cmd);
|
|
||||||
RT_UNUSED(info);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmd) {
|
|
||||||
RT_UNUSED(cmd);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmd,
|
|
||||||
rt_render_target_handle target,
|
|
||||||
rt_render_target_state state) {
|
|
||||||
RT_UNUSED(cmd);
|
|
||||||
RT_UNUSED(target);
|
|
||||||
RT_UNUSED(state);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle,
|
|
||||||
rt_render_target_handle render_target) {
|
|
||||||
RT_UNUSED(cmdbuf_handle);
|
|
||||||
RT_UNUSED(render_target);
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *info) {
|
|
||||||
RETURN_HANDLE_STUB(rt_render_target_handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int RequireExplicitSync(void) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
|
|
||||||
rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = CreateRenderTarget,
|
|
||||||
.RequireExplicitSynchronization =
|
|
||||||
RequireExplicitSync};
|
|
||||||
return rtCreateRenderGraphBuilder(&cbs);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
|
|
||||||
rtDestroyRenderGraphBuilder(builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
|
|
||||||
RT_UNUSED(render_graph);
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
|
|
||||||
uint32_t pass_id,
|
|
||||||
rt_render_view view,
|
|
||||||
unsigned int frame_id) {
|
|
||||||
RT_UNUSED(render_graph);
|
|
||||||
RT_UNUSED(pass_id);
|
|
||||||
RT_UNUSED(view);
|
|
||||||
RT_UNUSED(frame_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) {
|
|
||||||
RT_UNUSED(graph);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle,
|
|
||||||
rt_pipeline_handle pipeline_handle) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle,
|
|
||||||
uint32_t first_binding,
|
|
||||||
uint32_t count,
|
|
||||||
const rt_buffer_handle *buffers,
|
|
||||||
const uint64_t *_offsets) {
|
|
||||||
}
|
|
||||||
void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle,
|
|
||||||
uint32_t first,
|
|
||||||
uint32_t count) {
|
|
||||||
}
|
|
@ -1,219 +0,0 @@
|
|||||||
#include "command_buffers.h"
|
|
||||||
#include "gpu.h"
|
|
||||||
#include "transfers.h"
|
|
||||||
#include "resources.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_VkMaxBufferCount, "Number of slots for gpu buffers. Default: 1024.", 1024);
|
|
||||||
|
|
||||||
typedef struct rt_buffer_data_s {
|
|
||||||
rt_buffer data;
|
|
||||||
uint32_t version;
|
|
||||||
struct rt_buffer_data_s *next_free;
|
|
||||||
} rt_buffer_data;
|
|
||||||
|
|
||||||
static rt_buffer_data *_buffers;
|
|
||||||
static rt_buffer_data *_first_free;
|
|
||||||
static rt_mutex *_list_lock;
|
|
||||||
|
|
||||||
rt_result InitBufferManagement(void) {
|
|
||||||
size_t n = (size_t)rt_VkMaxBufferCount.i;
|
|
||||||
_buffers = calloc(n, sizeof(rt_buffer_data));
|
|
||||||
if (!_buffers)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
_first_free = &_buffers[1];
|
|
||||||
for (size_t i = 1; i < n - 1; ++i)
|
|
||||||
_buffers[i].next_free = &_buffers[i + 1];
|
|
||||||
_list_lock = rtCreateMutex();
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownBufferManagement(void) {
|
|
||||||
for (int i = 0; i < rt_VkMaxBufferCount.i; ++i) {
|
|
||||||
if (_buffers[i].data.buffer == VK_NULL_HANDLE)
|
|
||||||
continue;
|
|
||||||
vmaDestroyBuffer(g_gpu.allocator, _buffers[i].data.buffer, _buffers[i].data.allocation);
|
|
||||||
rtDestroyRWLock(&_buffers[i].data.lock);
|
|
||||||
memset(&_buffers[i], 0, sizeof(_buffers[i]));
|
|
||||||
}
|
|
||||||
free(_buffers);
|
|
||||||
_first_free = NULL;
|
|
||||||
rtDestroyMutex(_list_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers);
|
|
||||||
|
|
||||||
static void UploadViaMap(rt_buffer_data *buffer, const void *data, size_t size) {
|
|
||||||
rtLockWrite(&buffer->data.lock);
|
|
||||||
void *dev_mem = NULL;
|
|
||||||
if (vmaMapMemory(g_gpu.allocator, buffer->data.allocation, &dev_mem) != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Unable to map buffer for upload");
|
|
||||||
rtUnlockWrite(&buffer->data.lock);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
memcpy(dev_mem, data, size);
|
|
||||||
vmaUnmapMemory(g_gpu.allocator, buffer->data.allocation);
|
|
||||||
if (!buffer->data.coherent)
|
|
||||||
vmaFlushAllocation(g_gpu.allocator, buffer->data.allocation, 0, VK_WHOLE_SIZE);
|
|
||||||
rtUnlockWrite(&buffer->data.lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Convenience function that decides between mapping or uploading via transfer buffer */
|
|
||||||
static void UploadData(rt_buffer_data *buffer, const void *data, size_t size) {
|
|
||||||
if (buffer->data.mappable)
|
|
||||||
UploadViaMap(buffer, data, size);
|
|
||||||
else
|
|
||||||
rtUploadToBuffer(buffer->data.buffer,
|
|
||||||
buffer->data.allocation,
|
|
||||||
buffer->data.owner,
|
|
||||||
data,
|
|
||||||
size);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
|
|
||||||
const rt_buffer_info *info,
|
|
||||||
rt_buffer_handle *p_buffers) {
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
rtLockMutex(_list_lock);
|
|
||||||
rt_buffer_data *slot = _first_free;
|
|
||||||
if (!slot) {
|
|
||||||
rtUnlockMutex(_list_lock);
|
|
||||||
if (i > 0)
|
|
||||||
rtRenDestroyBuffers(i, p_buffers);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
_first_free = slot->next_free;
|
|
||||||
rtUnlockMutex(_list_lock);
|
|
||||||
|
|
||||||
VkBufferUsageFlags buffer_usage = 0;
|
|
||||||
switch (info->type) {
|
|
||||||
case RT_BUFFER_TYPE_VERTEX:
|
|
||||||
buffer_usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
|
||||||
break;
|
|
||||||
case RT_BUFFER_TYPE_INDEX:
|
|
||||||
buffer_usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
|
|
||||||
break;
|
|
||||||
case RT_BUFFER_TYPE_STORAGE:
|
|
||||||
buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
||||||
break;
|
|
||||||
case RT_BUFFER_TYPE_UNIFORM:
|
|
||||||
buffer_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
buffer_usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
|
||||||
|
|
||||||
VkBufferCreateInfo buffer_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
||||||
.size = info->size,
|
|
||||||
.usage = buffer_usage,
|
|
||||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
|
||||||
};
|
|
||||||
|
|
||||||
VmaMemoryUsage alloc_usage = 0;
|
|
||||||
VmaAllocationCreateFlags alloc_flags = 0;
|
|
||||||
switch (info->usage) {
|
|
||||||
case RT_BUFFER_USAGE_STATIC:
|
|
||||||
alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
|
|
||||||
alloc_flags = 0;
|
|
||||||
break;
|
|
||||||
case RT_BUFFER_USAGE_DYNAMIC:
|
|
||||||
alloc_usage = VMA_MEMORY_USAGE_AUTO;
|
|
||||||
alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
|
||||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
|
|
||||||
break;
|
|
||||||
case RT_BUFFER_USAGE_TRANSIENT:
|
|
||||||
alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
|
|
||||||
alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
VmaAllocationCreateInfo alloc_info = {.usage = alloc_usage, .flags = alloc_flags};
|
|
||||||
|
|
||||||
VkResult res = vmaCreateBuffer(g_gpu.allocator,
|
|
||||||
&buffer_info,
|
|
||||||
&alloc_info,
|
|
||||||
&slot->data.buffer,
|
|
||||||
&slot->data.allocation,
|
|
||||||
NULL);
|
|
||||||
if (res != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to create a buffer: %u", res);
|
|
||||||
rtLockMutex(_list_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
rtUnlockMutex(_list_lock);
|
|
||||||
if (i > 0)
|
|
||||||
rtRenDestroyBuffers(i, p_buffers);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
|
||||||
if (!lock_res.ok) {
|
|
||||||
rtReportError("vk", "Failed to create lock for buffer.");
|
|
||||||
vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation);
|
|
||||||
rtLockMutex(_list_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
rtUnlockMutex(_list_lock);
|
|
||||||
if (i > 0)
|
|
||||||
rtRenDestroyBuffers(i, p_buffers);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkMemoryPropertyFlags properties;
|
|
||||||
vmaGetAllocationMemoryProperties(g_gpu.allocator, slot->data.allocation, &properties);
|
|
||||||
slot->data.mappable = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
|
|
||||||
slot->data.coherent = (properties & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
|
|
||||||
|
|
||||||
slot->data.owner = RT_VK_UNOWNED;
|
|
||||||
slot->data.state = RT_BUFFER_STATE_NOT_USED;
|
|
||||||
|
|
||||||
if (info->data)
|
|
||||||
UploadData(slot, info->data, info->size);
|
|
||||||
|
|
||||||
ptrdiff_t index = slot - _buffers;
|
|
||||||
p_buffers[i].index = (uint32_t)index;
|
|
||||||
p_buffers[i].version = slot->version;
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
if (buffers[i].index >= (uint32_t)rt_VkMaxBufferCount.i)
|
|
||||||
continue;
|
|
||||||
rt_buffer_data *slot = &_buffers[buffers[i].index];
|
|
||||||
if (slot->version != buffers[i].version) {
|
|
||||||
rtLog("vk", "Tried to destroy a buffer with an invalid handle (version mismatch).");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
rtLockWrite(&slot->data.lock);
|
|
||||||
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation);
|
|
||||||
slot->data.buffer = VK_NULL_HANDLE;
|
|
||||||
slot->data.allocation = VK_NULL_HANDLE;
|
|
||||||
rtUnlockWrite(&slot->data.lock);
|
|
||||||
rtDestroyRWLock(&slot->data.lock);
|
|
||||||
|
|
||||||
rtLockMutex(_list_lock);
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
rtUnlockMutex(_list_lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_buffer *rtGetBuffer(rt_buffer_handle handle) {
|
|
||||||
if (handle.index >= (uint32_t)rt_VkMaxBufferCount.i)
|
|
||||||
return NULL;
|
|
||||||
rt_buffer_data *slot = &_buffers[handle.index];
|
|
||||||
if (slot->version != handle.version) {
|
|
||||||
rtLog("vk", "Tried to access a buffer with an invalid handle (version mismatch).");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return &slot->data;
|
|
||||||
}
|
|
@ -1,490 +0,0 @@
|
|||||||
#include "gpu.h"
|
|
||||||
#include "gpu_sync.h"
|
|
||||||
#include "swapchain.h"
|
|
||||||
|
|
||||||
#include "runtime/atomics.h"
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_VkMaxCommandPools,
|
|
||||||
"Maximum number of command pools that can be created. Default: 32",
|
|
||||||
32);
|
|
||||||
RT_CVAR_I(
|
|
||||||
rt_VkCommandBufferRingBufferSize,
|
|
||||||
"Size of the ring buffer used to store command buffers. Must be a power of two! Default: 512",
|
|
||||||
512);
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkCommandPool pools[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT * 3];
|
|
||||||
uint32_t distinct_pool_count;
|
|
||||||
|
|
||||||
VkCommandPool *compute_pools;
|
|
||||||
VkCommandPool *graphics_pools;
|
|
||||||
VkCommandPool *transfer_pools;
|
|
||||||
} rt_thread_pools;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkCommandBuffer command_buffer;
|
|
||||||
uint32_t version;
|
|
||||||
rt_gpu_queue target_queue;
|
|
||||||
} rt_command_buffer;
|
|
||||||
|
|
||||||
static rt_thread_pools *_pools;
|
|
||||||
static uint32_t _next_pools;
|
|
||||||
static RT_THREAD_LOCAL unsigned int t_first_pool;
|
|
||||||
|
|
||||||
static rt_command_buffer *_command_buffers;
|
|
||||||
/* We let this overflow on its own. Use MOD rt_VkCommandBufferRingBufferSize to get the actual
|
|
||||||
* index. */
|
|
||||||
static uint32_t _next_command_buffer;
|
|
||||||
|
|
||||||
rt_result InitCommandBufferManagement(void) {
|
|
||||||
_pools = calloc((size_t)rt_VkMaxCommandPools.i, sizeof(rt_thread_pools));
|
|
||||||
if (!_pools)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
|
|
||||||
_command_buffers =
|
|
||||||
calloc((size_t)rt_VkCommandBufferRingBufferSize.i, sizeof(rt_command_buffer));
|
|
||||||
if (!_command_buffers) {
|
|
||||||
free(_pools);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We keep 0 free as a "Not initialized" value for t_first_pool.
|
|
||||||
* The atomicinc used to acquire a pool returns the incremented value, so 0 is never returned.
|
|
||||||
*/
|
|
||||||
_next_pools = 0;
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void DestroyPools(rt_thread_pools *pools) {
|
|
||||||
for (uint32_t j = 0; j < pools->distinct_pool_count; ++j)
|
|
||||||
vkDestroyCommandPool(g_gpu.device, pools->pools[j], g_gpu.alloc_cb);
|
|
||||||
free(_pools);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownCommandBufferManagement(void) {
|
|
||||||
/* _next_pools is the number of existing pools */
|
|
||||||
for (uint32_t i = 1; i < _next_pools; ++i) {
|
|
||||||
DestroyPools(&_pools[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void rtResetCommandPools(unsigned int frame_id) {
|
|
||||||
unsigned int pool_idx = frame_id % g_gpu.max_frames_in_flight;
|
|
||||||
for (uint32_t i = 1; i < _next_pools; ++i) {
|
|
||||||
if (vkResetCommandPool(g_gpu.device,
|
|
||||||
_pools[i].graphics_pools[pool_idx],
|
|
||||||
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
|
|
||||||
rtLog("vk", "Failed to reset graphics pool slot %u index %u", i, pool_idx);
|
|
||||||
}
|
|
||||||
if (_pools[i].compute_pools != _pools[i].graphics_pools) {
|
|
||||||
if (vkResetCommandPool(g_gpu.device,
|
|
||||||
_pools[i].compute_pools[pool_idx],
|
|
||||||
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
|
|
||||||
rtLog("vk", "Failed to reset compute pool slot %u index %u", i, pool_idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (_pools[i].transfer_pools != _pools[i].graphics_pools &&
|
|
||||||
_pools[i].transfer_pools != _pools[i].compute_pools) {
|
|
||||||
if (vkResetCommandPool(g_gpu.device,
|
|
||||||
_pools[i].transfer_pools[pool_idx],
|
|
||||||
VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
|
|
||||||
rtLog("vk", "Failed to reset transfer pool slot %u index %u", i, pool_idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result CreatePools(rt_thread_pools *pools) {
|
|
||||||
/* Graphics pools */
|
|
||||||
pools->graphics_pools = pools->pools;
|
|
||||||
pools->distinct_pool_count = 0;
|
|
||||||
VkCommandPoolCreateInfo graphics_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
|
||||||
.queueFamilyIndex = g_gpu.graphics_family,
|
|
||||||
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT};
|
|
||||||
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
|
||||||
if (vkCreateCommandPool(g_gpu.device,
|
|
||||||
&graphics_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&pools->graphics_pools[i]) != VK_SUCCESS) {
|
|
||||||
rtLog("vk", "Failed to create a graphics command pool.");
|
|
||||||
DestroyPools(pools);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
++pools->distinct_pool_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_gpu.compute_family != g_gpu.graphics_family) {
|
|
||||||
VkCommandPoolCreateInfo compute_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
|
||||||
.queueFamilyIndex = g_gpu.compute_family,
|
|
||||||
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
|
|
||||||
};
|
|
||||||
pools->compute_pools = &pools->pools[pools->distinct_pool_count];
|
|
||||||
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
|
||||||
if (vkCreateCommandPool(g_gpu.device,
|
|
||||||
&compute_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&pools->compute_pools[i]) != VK_SUCCESS) {
|
|
||||||
rtLog("vk", "Failed to create a compute command pool.");
|
|
||||||
DestroyPools(pools);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
++pools->distinct_pool_count;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
pools->compute_pools = pools->graphics_pools;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_gpu.transfer_family != g_gpu.graphics_family &&
|
|
||||||
g_gpu.transfer_family != g_gpu.compute_family) {
|
|
||||||
VkCommandPoolCreateInfo transfer_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
|
||||||
.queueFamilyIndex = g_gpu.transfer_family,
|
|
||||||
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
|
|
||||||
};
|
|
||||||
pools->transfer_pools = &pools->pools[pools->distinct_pool_count];
|
|
||||||
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
|
||||||
if (vkCreateCommandPool(g_gpu.device,
|
|
||||||
&transfer_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&pools->transfer_pools[i]) != VK_SUCCESS) {
|
|
||||||
rtLog("vk", "Failed to create a transfer command pool.");
|
|
||||||
DestroyPools(pools);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
++pools->distinct_pool_count;
|
|
||||||
}
|
|
||||||
} else if (g_gpu.transfer_family == g_gpu.graphics_family) {
|
|
||||||
pools->transfer_pools = pools->graphics_pools;
|
|
||||||
} else if (g_gpu.transfer_family == g_gpu.compute_family) {
|
|
||||||
pools->transfer_pools = pools->compute_pools;
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
|
||||||
const rt_alloc_command_buffer_info *info,
|
|
||||||
rt_command_buffer_handle *p_command_buffers) {
|
|
||||||
rt_thread_pools *pools = &_pools[t_first_pool];
|
|
||||||
if (t_first_pool == 0) {
|
|
||||||
/* Acquire pools */
|
|
||||||
t_first_pool = rtAtomic32Inc(&_next_pools);
|
|
||||||
RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
|
|
||||||
|
|
||||||
pools = &_pools[t_first_pool];
|
|
||||||
rt_result create_res = CreatePools(pools);
|
|
||||||
if (create_res != RT_SUCCESS)
|
|
||||||
return create_res;
|
|
||||||
}
|
|
||||||
if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
|
|
||||||
uint32_t frame_id = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
|
||||||
rt_result result = RT_SUCCESS;
|
|
||||||
|
|
||||||
/* TODO: We should probably batch allocations of the same type */
|
|
||||||
uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
|
|
||||||
uint32_t start = rtAtomic32FetchAdd(&_next_command_buffer, count);
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
uint32_t slot = (start + i) % mod;
|
|
||||||
_command_buffers[slot].version =
|
|
||||||
(_command_buffers[slot].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
if (_command_buffers[slot].version == 0)
|
|
||||||
_command_buffers[slot].version = 1;
|
|
||||||
|
|
||||||
VkCommandPool pool = pools->graphics_pools[frame_id];
|
|
||||||
if (info[i].target_queue == RT_COMPUTE_QUEUE)
|
|
||||||
pool = pools->compute_pools[frame_id];
|
|
||||||
else if (info[i].target_queue == RT_TRANSFER_QUEUE)
|
|
||||||
pool = pools->transfer_pools[frame_id];
|
|
||||||
_command_buffers[slot].target_queue = info[i].target_queue;
|
|
||||||
|
|
||||||
VkCommandBufferAllocateInfo alloc_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
|
||||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
|
||||||
.commandBufferCount = 1,
|
|
||||||
.commandPool = pool,
|
|
||||||
};
|
|
||||||
if (vkAllocateCommandBuffers(g_gpu.device,
|
|
||||||
&alloc_info,
|
|
||||||
&_command_buffers[slot].command_buffer) != VK_SUCCESS) {
|
|
||||||
result = RT_UNKNOWN_ERROR;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkCommandBufferBeginInfo begin_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
|
||||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
|
||||||
};
|
|
||||||
vkBeginCommandBuffer(_command_buffers[slot].command_buffer, &begin_info);
|
|
||||||
|
|
||||||
p_command_buffers[i].index = (slot + 1);
|
|
||||||
p_command_buffers[i].version = _command_buffers[slot].version;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define RT_VK_LOG_SUBMIT_INFO 1
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
|
|
||||||
const rt_submit_command_buffers_info *info) {
|
|
||||||
|
|
||||||
uint32_t count = info->command_buffer_count;
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
if (!temp.arena)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
|
|
||||||
rt_result result = RT_SUCCESS;
|
|
||||||
VkQueue target_queue = rtGetQueue(queue);
|
|
||||||
|
|
||||||
VkCommandBufferSubmitInfo *command_buffers =
|
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBufferSubmitInfo, count);
|
|
||||||
if (!command_buffers) {
|
|
||||||
result = RT_OUT_OF_MEMORY;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkSemaphoreSubmitInfo *wait_semaphores =
|
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->wait_semaphore_count);
|
|
||||||
if (!wait_semaphores && info->wait_semaphore_count > 0) {
|
|
||||||
result = RT_OUT_OF_MEMORY;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
VkSemaphoreSubmitInfo *signal_semaphores =
|
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->signal_semaphore_count);
|
|
||||||
if (!signal_semaphores && info->signal_semaphore_count > 0) {
|
|
||||||
result = RT_OUT_OF_MEMORY;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
uint32_t wait_count = info->wait_semaphore_count;
|
|
||||||
uint32_t signal_count = info->signal_semaphore_count;
|
|
||||||
for (uint32_t i = 0; i < wait_count; ++i) {
|
|
||||||
VkSemaphoreSubmitInfo semaphore_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
|
||||||
.semaphore = rtGetSemaphore(info->wait_semaphores[i]),
|
|
||||||
.value = info->wait_values[i],
|
|
||||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
.deviceIndex = 0,
|
|
||||||
};
|
|
||||||
wait_semaphores[i] = semaphore_info;
|
|
||||||
}
|
|
||||||
for (uint32_t i = 0; i < signal_count; ++i) {
|
|
||||||
VkSemaphoreSubmitInfo semaphore_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
|
||||||
.semaphore = rtGetSemaphore(info->signal_semaphores[i]),
|
|
||||||
.value = info->signal_values[i],
|
|
||||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
.deviceIndex = 0,
|
|
||||||
};
|
|
||||||
signal_semaphores[i] = semaphore_info;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
if (!RT_IS_HANDLE_VALID(info->command_buffers[i])) {
|
|
||||||
rtLog("vk", "Tried to submit an invalid command buffer.");
|
|
||||||
result = RT_INVALID_VALUE;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
uint32_t slot = info->command_buffers[i].index - 1;
|
|
||||||
if (_command_buffers[slot].version != info->command_buffers[i].version) {
|
|
||||||
rtLog("vk",
|
|
||||||
"Mismatch between handle version and stored version while submitting a command "
|
|
||||||
"buffer");
|
|
||||||
result = RT_INVALID_VALUE;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
if (_command_buffers[slot].target_queue != queue) {
|
|
||||||
rtLog("vk", "Mismatch between command buffer target queue and submit target queue.");
|
|
||||||
result = RT_INVALID_VALUE;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
command_buffers[i].sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO;
|
|
||||||
command_buffers[i].pNext = NULL;
|
|
||||||
command_buffers[i].deviceMask = 0;
|
|
||||||
command_buffers[i].commandBuffer = _command_buffers[slot].command_buffer;
|
|
||||||
|
|
||||||
vkEndCommandBuffer(command_buffers[i].commandBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if RT_VK_LOG_SUBMIT_INFO
|
|
||||||
{
|
|
||||||
const char *queue_str = "<invalid>";
|
|
||||||
if (queue == RT_GRAPHICS_QUEUE)
|
|
||||||
queue_str = "GRAPHICS";
|
|
||||||
else if (queue == RT_COMPUTE_QUEUE)
|
|
||||||
queue_str = "COMPUTE";
|
|
||||||
else if (queue == RT_TRANSFER_QUEUE)
|
|
||||||
queue_str = "TRANSFER";
|
|
||||||
rtLog("vk", "Submit Info");
|
|
||||||
rtLog("vk", "Queue: %s", queue_str);
|
|
||||||
rtLog("vk", "Command Buffers: %u", count);
|
|
||||||
rtLog("vk", " - TODO: More Info");
|
|
||||||
rtLog("vk", "Wait Semaphores:");
|
|
||||||
for (uint32_t i = 0; i < wait_count; ++i) {
|
|
||||||
rtLog("vk",
|
|
||||||
" - %u:%u Value %u",
|
|
||||||
info->wait_semaphores[i].version,
|
|
||||||
info->wait_semaphores[i].index,
|
|
||||||
info->wait_values[i]);
|
|
||||||
}
|
|
||||||
rtLog("vk", "Signal Semaphores:");
|
|
||||||
for (uint32_t i = 0; i < signal_count; ++i) {
|
|
||||||
rtLog("vk",
|
|
||||||
" - %u:%u Value %u",
|
|
||||||
info->signal_semaphores[i].version,
|
|
||||||
info->signal_semaphores[i].index,
|
|
||||||
info->signal_values[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
VkSubmitInfo2 submit_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
|
||||||
.waitSemaphoreInfoCount = wait_count,
|
|
||||||
.signalSemaphoreInfoCount = signal_count,
|
|
||||||
.pWaitSemaphoreInfos = wait_semaphores,
|
|
||||||
.pSignalSemaphoreInfos = signal_semaphores,
|
|
||||||
.commandBufferInfoCount = count,
|
|
||||||
.pCommandBufferInfos = command_buffers,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (vkQueueSubmit2(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
|
|
||||||
rtLog("vk", "vkQueueSubmit failed.");
|
|
||||||
result = RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkCommandBuffer rtGetCommandBuffer(rt_command_buffer_handle cmdbuf) {
|
|
||||||
uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
|
|
||||||
if (!RT_IS_HANDLE_VALID(cmdbuf))
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
uint32_t slot = (cmdbuf.index - 1) % mod;
|
|
||||||
if (_command_buffers[slot].version != cmdbuf.version) {
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
}
|
|
||||||
return _command_buffers[slot].command_buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkCommandBuffer rtAllocSingleCommandBuffer(rt_gpu_queue queue) {
|
|
||||||
rt_thread_pools *pools = &_pools[t_first_pool];
|
|
||||||
if (t_first_pool == 0) {
|
|
||||||
/* Acquire pools */
|
|
||||||
t_first_pool = rtAtomic32Inc(&_next_pools);
|
|
||||||
RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
|
|
||||||
|
|
||||||
pools = &_pools[t_first_pool];
|
|
||||||
rt_result create_res = CreatePools(pools);
|
|
||||||
if (create_res != RT_SUCCESS)
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
}
|
|
||||||
if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
|
|
||||||
uint32_t frame_id = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
|
||||||
VkCommandPool pool = pools->graphics_pools[frame_id];
|
|
||||||
if (queue == RT_COMPUTE_QUEUE)
|
|
||||||
pool = pools->compute_pools[frame_id];
|
|
||||||
else if (queue == RT_TRANSFER_QUEUE)
|
|
||||||
pool = pools->transfer_pools[frame_id];
|
|
||||||
|
|
||||||
VkCommandBufferAllocateInfo alloc_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
|
||||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
|
||||||
.commandBufferCount = 1,
|
|
||||||
.commandPool = pool,
|
|
||||||
};
|
|
||||||
VkCommandBuffer cmdbuf;
|
|
||||||
if (vkAllocateCommandBuffers(g_gpu.device, &alloc_info, &cmdbuf) != VK_SUCCESS) {
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
}
|
|
||||||
return cmdbuf;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
|
|
||||||
const VkSemaphore *wait_semaphores,
|
|
||||||
const uint32_t *wait_values,
|
|
||||||
uint32_t wait_semaphore_count,
|
|
||||||
const VkSemaphore *signal_semaphores,
|
|
||||||
const uint32_t *signal_values,
|
|
||||||
uint32_t signal_semaphore_count,
|
|
||||||
rt_gpu_queue queue,
|
|
||||||
VkFence fence) {
|
|
||||||
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
if (!temp.arena)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
VkQueue target_queue = rtGetQueue(queue);
|
|
||||||
rt_result result = RT_SUCCESS;
|
|
||||||
|
|
||||||
VkSemaphoreSubmitInfo *wait_semaphore_info =
|
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, wait_semaphore_count);
|
|
||||||
if (!wait_semaphore_info && wait_semaphore_count > 0) {
|
|
||||||
result = RT_OUT_OF_MEMORY;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
VkSemaphoreSubmitInfo *signal_semaphore_info =
|
|
||||||
RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, signal_semaphore_count);
|
|
||||||
if (!signal_semaphore_info && signal_semaphore_count > 0) {
|
|
||||||
result = RT_OUT_OF_MEMORY;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
uint32_t wait_count = wait_semaphore_count;
|
|
||||||
uint32_t signal_count = signal_semaphore_count;
|
|
||||||
for (uint32_t i = 0; i < wait_count; ++i) {
|
|
||||||
VkSemaphoreSubmitInfo semaphore_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
|
||||||
.semaphore = wait_semaphores[i],
|
|
||||||
.value = (wait_values) ? wait_values[i] : 0,
|
|
||||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
.deviceIndex = 0,
|
|
||||||
};
|
|
||||||
wait_semaphore_info[i] = semaphore_info;
|
|
||||||
}
|
|
||||||
for (uint32_t i = 0; i < signal_count; ++i) {
|
|
||||||
VkSemaphoreSubmitInfo semaphore_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
|
||||||
.semaphore = signal_semaphores[i],
|
|
||||||
.value = (signal_values) ? signal_values[i] : 0,
|
|
||||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
.deviceIndex = 0,
|
|
||||||
};
|
|
||||||
signal_semaphore_info[i] = semaphore_info;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkCommandBufferSubmitInfo command_buffer_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
|
||||||
.deviceMask = 0,
|
|
||||||
.commandBuffer = command_buffer,
|
|
||||||
};
|
|
||||||
|
|
||||||
VkSubmitInfo2 submit_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
|
||||||
.waitSemaphoreInfoCount = wait_count,
|
|
||||||
.signalSemaphoreInfoCount = signal_count,
|
|
||||||
.pWaitSemaphoreInfos = wait_semaphore_info,
|
|
||||||
.pSignalSemaphoreInfos = signal_semaphore_info,
|
|
||||||
.commandBufferInfoCount = 1,
|
|
||||||
.pCommandBufferInfos = &command_buffer_info,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (vkQueueSubmit2(target_queue, 1, &submit_info, fence) != VK_SUCCESS) {
|
|
||||||
rtLog("vk", "vkQueueSubmit failed.");
|
|
||||||
result = RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return result;
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
#ifndef RT_COMMAND_BUFFERS_H
|
|
||||||
#define RT_COMMAND_BUFFERS_H
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
void rtResetCommandPools(unsigned int frame_id);
|
|
||||||
|
|
||||||
VkCommandBuffer rtGetCommandBuffer(rt_command_buffer_handle cmdbuf);
|
|
||||||
|
|
||||||
VkCommandBuffer rtAllocSingleCommandBuffer(rt_gpu_queue queue);
|
|
||||||
|
|
||||||
rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
|
|
||||||
const VkSemaphore *wait_semaphores,
|
|
||||||
const uint32_t *wait_values,
|
|
||||||
uint32_t wait_semaphore_count,
|
|
||||||
const VkSemaphore *signal_semaphores,
|
|
||||||
const uint32_t *signal_values,
|
|
||||||
uint32_t signal_semaphore_count,
|
|
||||||
rt_gpu_queue queue,
|
|
||||||
VkFence fence);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,510 +0,0 @@
|
|||||||
#include "command_buffers.h"
|
|
||||||
#include "gpu.h"
|
|
||||||
#include "render_targets.h"
|
|
||||||
#include "swapchain.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#define USE_SIMPLE_SYNC_LIB 0
|
|
||||||
|
|
||||||
#if USE_SIMPLE_SYNC_LIB
|
|
||||||
#include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h"
|
|
||||||
#include <stdbool.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Retrieve the VkCommandBuffer as varname, or return */
|
|
||||||
#define GET_CMDBUF(varname, handle) \
|
|
||||||
VkCommandBuffer varname = rtGetCommandBuffer((handle)); \
|
|
||||||
if (varname == VK_NULL_HANDLE) { \
|
|
||||||
rtLog("vk", "Failed to retrive VkCommandBuffer for %s", __FUNCTION__); \
|
|
||||||
return; \
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmdbuf_handle,
|
|
||||||
const rt_cmd_begin_pass_info *info) {
|
|
||||||
GET_CMDBUF(cmdbuf, cmdbuf_handle)
|
|
||||||
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
if (!temp.arena) {
|
|
||||||
rtReportError("vk", "Failed to acquire a temporary arena for CmdBeginPass");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
VkDebugUtilsLabelEXT debug_label = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
|
|
||||||
.color = {0.39f, 0.58f, 0.92f, 1.f},
|
|
||||||
.pLabelName = (info->name) ? info->name : "Unnamed pass",
|
|
||||||
};
|
|
||||||
vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Acquire the necessary attachments */
|
|
||||||
|
|
||||||
VkRenderingAttachmentInfo *colorbuffers =
|
|
||||||
RT_ARENA_PUSH_ARRAY_ZERO(temp.arena, VkRenderingAttachmentInfo, info->color_buffer_count);
|
|
||||||
for (uint32_t i = 0; i < info->color_buffer_count; ++i) {
|
|
||||||
VkImageView image_view = VK_NULL_HANDLE;
|
|
||||||
if (RT_IS_HANDLE_VALID(info->color_buffers[i])) {
|
|
||||||
rt_render_target *rt = rtGetRenderTarget(info->color_buffers[i]);
|
|
||||||
if (rt)
|
|
||||||
image_view = rt->view[g_gpu.current_frame_id % g_gpu.max_frames_in_flight];
|
|
||||||
}
|
|
||||||
|
|
||||||
colorbuffers[i].sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
|
|
||||||
colorbuffers[i].pNext = NULL;
|
|
||||||
colorbuffers[i].imageView = image_view;
|
|
||||||
colorbuffers[i].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
||||||
switch (info->color_buffer_loads[i]) {
|
|
||||||
case RT_PASS_LOAD_MODE_CLEAR:
|
|
||||||
colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
|
||||||
break;
|
|
||||||
case RT_PASS_LOAD_MODE_LOAD:
|
|
||||||
colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
switch (info->color_buffer_writes[i]) {
|
|
||||||
case RT_PASS_WRITE_MODE_STORE:
|
|
||||||
colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
|
||||||
break;
|
|
||||||
case RT_PASS_WRITE_MODE_DISCARD:
|
|
||||||
colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_NONE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
memcpy(&colorbuffers[i].clearValue.color.float32,
|
|
||||||
info->color_buffer_clear_values[i].color.v,
|
|
||||||
sizeof(float) * 4);
|
|
||||||
|
|
||||||
/* TODO: Multisample resolve */
|
|
||||||
colorbuffers[i].resolveMode = VK_RESOLVE_MODE_NONE;
|
|
||||||
colorbuffers[i].resolveImageView = VK_NULL_HANDLE;
|
|
||||||
colorbuffers[i].resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* depth and stencil might be the same */
|
|
||||||
VkRenderingAttachmentInfo *depth_stencil_buffer =
|
|
||||||
RT_IS_HANDLE_VALID(info->depth_stencil_buffer)
|
|
||||||
? RT_ARENA_PUSH_STRUCT_ZERO(temp.arena, VkRenderingAttachmentInfo)
|
|
||||||
: NULL;
|
|
||||||
if (depth_stencil_buffer) {
|
|
||||||
VkImageView image_view = VK_NULL_HANDLE;
|
|
||||||
rt_render_target *rt = rtGetRenderTarget(info->depth_stencil_buffer);
|
|
||||||
if (rt)
|
|
||||||
image_view = rt->view[g_gpu.current_frame_id % g_gpu.max_frames_in_flight];
|
|
||||||
|
|
||||||
depth_stencil_buffer->sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
|
|
||||||
depth_stencil_buffer->pNext = NULL;
|
|
||||||
depth_stencil_buffer->imageView = image_view;
|
|
||||||
depth_stencil_buffer->imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
|
||||||
switch (info->depth_stencil_buffer_load) {
|
|
||||||
case RT_PASS_LOAD_MODE_CLEAR:
|
|
||||||
depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
|
||||||
break;
|
|
||||||
case RT_PASS_LOAD_MODE_LOAD:
|
|
||||||
depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
switch (info->depth_stencil_buffer_write) {
|
|
||||||
case RT_PASS_WRITE_MODE_STORE:
|
|
||||||
depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
|
||||||
break;
|
|
||||||
case RT_PASS_WRITE_MODE_DISCARD:
|
|
||||||
depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_NONE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* TODO: Multisample resolve */
|
|
||||||
depth_stencil_buffer->resolveMode = VK_RESOLVE_MODE_NONE;
|
|
||||||
depth_stencil_buffer->resolveImageView = VK_NULL_HANDLE;
|
|
||||||
depth_stencil_buffer->resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkRect2D render_area = {
|
|
||||||
.offset = { .x = info->render_area.offset.x, .y = info->render_area.offset.y},
|
|
||||||
.extent = {.width = info->render_area.size.x, .height = info->render_area.size.y}
|
|
||||||
};
|
|
||||||
if (render_area.extent.width == 0)
|
|
||||||
render_area.extent.width = g_swapchain.extent.width;
|
|
||||||
if (render_area.extent.height == 0)
|
|
||||||
render_area.extent.height = g_swapchain.extent.height;
|
|
||||||
|
|
||||||
VkRenderingInfo rendering_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
|
|
||||||
.pColorAttachments = colorbuffers,
|
|
||||||
.colorAttachmentCount = info->color_buffer_count,
|
|
||||||
.pDepthAttachment = depth_stencil_buffer,
|
|
||||||
.pStencilAttachment = depth_stencil_buffer,
|
|
||||||
.layerCount = 1,
|
|
||||||
.renderArea = render_area,
|
|
||||||
};
|
|
||||||
|
|
||||||
vkCmdBeginRendering(cmdbuf, &rendering_info);
|
|
||||||
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmdbuf_handle) {
|
|
||||||
GET_CMDBUF(cmdbuf, cmdbuf_handle)
|
|
||||||
vkCmdEndRendering(cmdbuf);
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Non-layout transition barrier */
|
|
||||||
static void ExecuteRenderTargetBarrier(rt_render_target *rt,
|
|
||||||
uint32_t image_index,
|
|
||||||
VkCommandBuffer cmdbuf) { /* Determine old and new layout */
|
|
||||||
VkImageLayout layout;
|
|
||||||
switch (rt->states[image_index]) {
|
|
||||||
case RT_RENDER_TARGET_STATE_ATTACHMENT:
|
|
||||||
layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
|
|
||||||
break;
|
|
||||||
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
|
|
||||||
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
|
|
||||||
layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
VkDebugUtilsLabelEXT debug_label = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
|
|
||||||
.pLabelName = "Render Target Barrier",
|
|
||||||
.color = {.13f, .54f, .13f, .75f},
|
|
||||||
};
|
|
||||||
vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
VkImageAspectFlags aspect_mask =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
|
|
||||||
: VK_IMAGE_ASPECT_COLOR_BIT;
|
|
||||||
|
|
||||||
/* Determine access flags */
|
|
||||||
VkPipelineStageFlags2 src_stage = 0;
|
|
||||||
VkPipelineStageFlags2 dst_stage = 0;
|
|
||||||
VkAccessFlags2 src_access = 0;
|
|
||||||
VkAccessFlags2 dst_access = 0;
|
|
||||||
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
|
||||||
src_access =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
|
||||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
|
|
||||||
dst_access =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
|
||||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT
|
|
||||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT |
|
|
||||||
VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT;
|
|
||||||
src_stage =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
|
|
||||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
|
|
||||||
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
||||||
dst_stage =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
|
|
||||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
|
|
||||||
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
||||||
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
|
|
||||||
src_access =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT
|
|
||||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT;
|
|
||||||
dst_access = VK_ACCESS_2_SHADER_READ_BIT;
|
|
||||||
src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
|
||||||
dst_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkImageMemoryBarrier2 image_barrier = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
|
|
||||||
.srcStageMask = src_stage,
|
|
||||||
.srcAccessMask = src_access,
|
|
||||||
.dstStageMask = dst_stage,
|
|
||||||
.dstAccessMask = dst_access,
|
|
||||||
.oldLayout = layout,
|
|
||||||
.newLayout = layout,
|
|
||||||
.image = rt->image[image_index],
|
|
||||||
/* clang-format off */
|
|
||||||
.subresourceRange = {
|
|
||||||
.aspectMask = aspect_mask,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.layerCount = 1,
|
|
||||||
.levelCount = 1,
|
|
||||||
},
|
|
||||||
/* clang-format on */
|
|
||||||
};
|
|
||||||
|
|
||||||
VkDependencyInfo dep_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
||||||
.pImageMemoryBarriers = &image_barrier,
|
|
||||||
.imageMemoryBarrierCount = 1,
|
|
||||||
};
|
|
||||||
vkCmdPipelineBarrier2(cmdbuf, &dep_info);
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static void DoLayoutTransition(rt_render_target *rt,
|
|
||||||
uint32_t image_index,
|
|
||||||
rt_render_target_state new_state,
|
|
||||||
VkCommandBuffer cmdbuf) {
|
|
||||||
#if !USE_SIMPLE_SYNC_LIB
|
|
||||||
/* Determine old and new layout */
|
|
||||||
VkImageLayout old_layout;
|
|
||||||
switch (rt->states[image_index]) {
|
|
||||||
case RT_RENDER_TARGET_STATE_ATTACHMENT:
|
|
||||||
old_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
|
|
||||||
break;
|
|
||||||
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
|
|
||||||
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
|
|
||||||
old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
old_layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
VkImageLayout new_layout;
|
|
||||||
switch (new_state) {
|
|
||||||
case RT_RENDER_TARGET_STATE_ATTACHMENT:
|
|
||||||
new_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
|
|
||||||
break;
|
|
||||||
case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
|
|
||||||
case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
|
|
||||||
new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
new_layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
VkDebugUtilsLabelEXT debug_label = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
|
|
||||||
.pLabelName = "Transition Render Target",
|
|
||||||
.color = {.13f, .54f, .13f, .75f},
|
|
||||||
};
|
|
||||||
vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
VkImageAspectFlags aspect_mask =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
|
|
||||||
: VK_IMAGE_ASPECT_COLOR_BIT;
|
|
||||||
|
|
||||||
VkPipelineStageFlags2 src_stage = 0;
|
|
||||||
VkPipelineStageFlags2 dst_stage = 0;
|
|
||||||
/* Determine access flags */
|
|
||||||
VkAccessFlags2 src_access = 0;
|
|
||||||
VkAccessFlags2 dst_access = 0;
|
|
||||||
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
|
||||||
src_access =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
|
||||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
|
|
||||||
src_stage =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
|
|
||||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
|
|
||||||
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
||||||
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
|
|
||||||
src_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
|
|
||||||
src_stage =
|
|
||||||
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT; // VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
|
|
||||||
// VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
|
||||||
dst_access =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
|
|
||||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT
|
|
||||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT;
|
|
||||||
dst_stage = (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
|
|
||||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
|
|
||||||
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
||||||
} else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
|
|
||||||
dst_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
|
|
||||||
dst_stage = VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkImageMemoryBarrier2 image_barrier = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
|
|
||||||
.srcStageMask = src_stage,
|
|
||||||
.srcAccessMask = src_access,
|
|
||||||
.dstStageMask = dst_stage,
|
|
||||||
.dstAccessMask = dst_access,
|
|
||||||
.oldLayout = old_layout,
|
|
||||||
.newLayout = new_layout,
|
|
||||||
.image = rt->image[image_index],
|
|
||||||
/* clang-format off */
|
|
||||||
.subresourceRange = {
|
|
||||||
.aspectMask = aspect_mask,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.layerCount = 1,
|
|
||||||
.levelCount = 1,
|
|
||||||
},
|
|
||||||
/* clang-format on */
|
|
||||||
};
|
|
||||||
|
|
||||||
VkDependencyInfo dep_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
||||||
.pImageMemoryBarriers = &image_barrier,
|
|
||||||
.imageMemoryBarrierCount = 1,
|
|
||||||
};
|
|
||||||
vkCmdPipelineBarrier2(cmdbuf, &dep_info);
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
ThsvsAccessType prev_access;
|
|
||||||
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
|
||||||
if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
prev_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE;
|
|
||||||
else
|
|
||||||
prev_access = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE;
|
|
||||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) {
|
|
||||||
prev_access = THSVS_ACCESS_NONE;
|
|
||||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) {
|
|
||||||
prev_access = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER;
|
|
||||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) {
|
|
||||||
prev_access = THSVS_ACCESS_ANY_SHADER_READ_OTHER;
|
|
||||||
}
|
|
||||||
|
|
||||||
ThsvsAccessType next_accesses[2];
|
|
||||||
uint32_t next_access_count = 0;
|
|
||||||
if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
|
||||||
if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) {
|
|
||||||
next_accesses[0] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ;
|
|
||||||
next_accesses[1] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE;
|
|
||||||
} else {
|
|
||||||
next_accesses[0] = THSVS_ACCESS_COLOR_ATTACHMENT_READ;
|
|
||||||
next_accesses[1] = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE;
|
|
||||||
}
|
|
||||||
next_access_count = 2;
|
|
||||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) {
|
|
||||||
next_accesses[0] = THSVS_ACCESS_NONE;
|
|
||||||
next_access_count = 1;
|
|
||||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) {
|
|
||||||
next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER;
|
|
||||||
next_access_count = 1;
|
|
||||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) {
|
|
||||||
next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_OTHER;
|
|
||||||
next_accesses[1] = THSVS_ACCESS_ANY_SHADER_WRITE;
|
|
||||||
next_access_count = 2;
|
|
||||||
}
|
|
||||||
VkImageAspectFlags aspect_mask =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
|
|
||||||
: VK_IMAGE_ASPECT_COLOR_BIT;
|
|
||||||
ThsvsImageBarrier barrier = {0};
|
|
||||||
barrier.image = rt->image[image_index];
|
|
||||||
barrier.pPrevAccesses = &prev_access;
|
|
||||||
barrier.prevAccessCount = 1;
|
|
||||||
barrier.prevLayout = THSVS_IMAGE_LAYOUT_OPTIMAL;
|
|
||||||
barrier.nextAccessCount = next_access_count;
|
|
||||||
barrier.pNextAccesses = next_accesses;
|
|
||||||
barrier.nextLayout = THSVS_IMAGE_LAYOUT_OPTIMAL;
|
|
||||||
barrier.discardContents = false;
|
|
||||||
barrier.subresourceRange.aspectMask = aspect_mask;
|
|
||||||
barrier.subresourceRange.baseArrayLayer = 0;
|
|
||||||
barrier.subresourceRange.layerCount = 1;
|
|
||||||
barrier.subresourceRange.baseMipLevel = 0;
|
|
||||||
barrier.subresourceRange.levelCount = 1;
|
|
||||||
thsvsCmdPipelineBarrier(cmdbuf, NULL, 0, NULL, 1, &barrier);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
rt->states[image_index] = new_state;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdbuf_handle,
|
|
||||||
rt_render_target_handle render_target,
|
|
||||||
rt_render_target_state new_state) {
|
|
||||||
GET_CMDBUF(cmdbuf, cmdbuf_handle)
|
|
||||||
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
|
||||||
if (render_target.index == rtGetSwapchainRenderTarget().index) {
|
|
||||||
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_target *rt = rtGetRenderTarget(render_target);
|
|
||||||
if (!rt) {
|
|
||||||
rtLog("vk", "Tried to transition invalid render target");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rt->states[image_index] != new_state)
|
|
||||||
DoLayoutTransition(rt, image_index, new_state, cmdbuf);
|
|
||||||
else
|
|
||||||
ExecuteRenderTargetBarrier(rt, image_index, cmdbuf);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle,
|
|
||||||
rt_render_target_handle render_target) {
|
|
||||||
GET_CMDBUF(cmdbuf, cmdbuf_handle)
|
|
||||||
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
|
||||||
if (render_target.index == rtGetSwapchainRenderTarget().index) {
|
|
||||||
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
|
|
||||||
}
|
|
||||||
rt_render_target *rt = rtGetRenderTarget(render_target);
|
|
||||||
if (!rt) {
|
|
||||||
rtLog("vk", "Tried to flush invalid render target");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkAccessFlags2 src_access;
|
|
||||||
VkPipelineStageFlags2 src_stage;
|
|
||||||
if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
|
|
||||||
src_access =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
|
||||||
: VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
|
|
||||||
src_stage =
|
|
||||||
(rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
|
|
||||||
? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
|
|
||||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
|
|
||||||
: VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
||||||
} else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE){ /* SAMPLED_IMAGE or STORAGE_IMAGE */
|
|
||||||
src_access = VK_ACCESS_2_MEMORY_WRITE_BIT;
|
|
||||||
src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
|
||||||
}else {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkMemoryBarrier2 barrier = {.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
|
|
||||||
.srcAccessMask = src_access,
|
|
||||||
.srcStageMask = src_stage,
|
|
||||||
.dstAccessMask = 0,
|
|
||||||
.dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT};
|
|
||||||
VkDependencyInfo dep = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
||||||
.memoryBarrierCount = 1,
|
|
||||||
.pMemoryBarriers = &barrier,
|
|
||||||
};
|
|
||||||
vkCmdPipelineBarrier2(cmdbuf, &dep);
|
|
||||||
}
|
|
@ -1,139 +0,0 @@
|
|||||||
#include "command_buffers.h"
|
|
||||||
#include "gpu.h"
|
|
||||||
#include "render_targets.h"
|
|
||||||
#include "swapchain.h"
|
|
||||||
#include "transfers.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
#define ONE_SECOND_NS 1000000000u
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
|
|
||||||
g_gpu.current_frame_id = frame_id;
|
|
||||||
|
|
||||||
rt_frame_data *frame = rtGetFrameData(frame_id);
|
|
||||||
|
|
||||||
/* Wait until the previous frame is done */
|
|
||||||
VkFence fence = g_swapchain.image_fences[frame_id % g_swapchain.image_count];
|
|
||||||
RT_VK_CHECK(vkWaitForFences(g_gpu.device, 1, &fence, VK_TRUE, ONE_SECOND_NS));
|
|
||||||
RT_VK_CHECK(vkResetFences(g_gpu.device, 1, &fence));
|
|
||||||
|
|
||||||
rtResetCommandPools(frame_id);
|
|
||||||
|
|
||||||
VkResult acquire_res = vkAcquireNextImageKHR(g_gpu.device,
|
|
||||||
g_swapchain.swapchain,
|
|
||||||
ONE_SECOND_NS,
|
|
||||||
frame->image_available,
|
|
||||||
fence,
|
|
||||||
&frame->swapchain_image_index);
|
|
||||||
if (acquire_res == VK_SUBOPTIMAL_KHR || acquire_res == VK_ERROR_OUT_OF_DATE_KHR) {
|
|
||||||
/* We need to recreate the swapchain and try again */
|
|
||||||
rtLog("vk", "Swapchain has become suboptimal and needs to be re-created.");
|
|
||||||
vkDeviceWaitIdle(g_gpu.device);
|
|
||||||
if (rtRecreateSwapchain() != RT_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to recreate the swapchain.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rtUpdateSwapchainRenderTarget();
|
|
||||||
rtUpdateRenderTargetsFromSwapchain(g_swapchain.image_count,
|
|
||||||
g_swapchain.format,
|
|
||||||
g_swapchain.extent);
|
|
||||||
rtRenBeginFrame(frame_id);
|
|
||||||
} else if (acquire_res != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "vkAcquireNextImageKHR failed: %u", acquire_res);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Update the swapchain render target */
|
|
||||||
rt_render_target_handle swap_rt_handle = rtGetSwapchainRenderTarget();
|
|
||||||
rt_render_target *swap_rt = rtGetRenderTarget(swap_rt_handle);
|
|
||||||
swap_rt->states[frame->swapchain_image_index] = RT_RENDER_TARGET_STATE_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
|
|
||||||
rt_frame_data *frame = rtGetFrameData(frame_id);
|
|
||||||
|
|
||||||
uint32_t image_index = frame->swapchain_image_index;
|
|
||||||
|
|
||||||
/* Transition the swap chain image to the correct layout */
|
|
||||||
VkCommandBuffer cmd = rtAllocSingleCommandBuffer(RT_GRAPHICS_QUEUE);
|
|
||||||
if (cmd == VK_NULL_HANDLE) {
|
|
||||||
rtReportError("vk",
|
|
||||||
"Failed to allocate a command buffer for transitioning the swapchain image "
|
|
||||||
"to PRESENT_SRC layout.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
|
||||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
|
|
||||||
vkBeginCommandBuffer(cmd, &begin_info);
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
VkDebugUtilsLabelEXT debug_label = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
|
|
||||||
.color = {.13f, .54f, .13f, 1.f},
|
|
||||||
.pLabelName = "Transition Swapchain"
|
|
||||||
};
|
|
||||||
vkCmdBeginDebugUtilsLabelEXT(cmd, &debug_label);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
VkImageMemoryBarrier2 image_barrier = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
|
|
||||||
.srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
.srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT,
|
|
||||||
.dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
.dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT | VK_ACCESS_2_MEMORY_READ_BIT,
|
|
||||||
.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
|
||||||
.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
|
||||||
.image = g_swapchain.images[image_index],
|
|
||||||
/* clang-format off */
|
|
||||||
.subresourceRange = {
|
|
||||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.layerCount = 1,
|
|
||||||
.levelCount = 1,
|
|
||||||
},
|
|
||||||
/* clang-format on */
|
|
||||||
};
|
|
||||||
|
|
||||||
VkDependencyInfo dep_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
||||||
.pImageMemoryBarriers = &image_barrier,
|
|
||||||
.imageMemoryBarrierCount = 1,
|
|
||||||
};
|
|
||||||
vkCmdPipelineBarrier2(cmd, &dep_info);
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
vkCmdEndDebugUtilsLabelEXT(cmd);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
vkEndCommandBuffer(cmd);
|
|
||||||
if (rtSubmitSingleCommandBuffer(cmd,
|
|
||||||
&frame->render_finished,
|
|
||||||
NULL,
|
|
||||||
1,
|
|
||||||
&frame->swapchain_transitioned,
|
|
||||||
NULL,
|
|
||||||
1,
|
|
||||||
RT_GRAPHICS_QUEUE,
|
|
||||||
VK_NULL_HANDLE) != RT_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to submit the layout transition for the swapchain image.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkPresentInfoKHR present_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
|
||||||
.pImageIndices = &image_index,
|
|
||||||
.pSwapchains = &g_swapchain.swapchain,
|
|
||||||
.swapchainCount = 1,
|
|
||||||
.pWaitSemaphores = &frame->swapchain_transitioned,
|
|
||||||
.waitSemaphoreCount = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
VkResult res = vkQueuePresentKHR(g_gpu.present_queue, &present_info);
|
|
||||||
if (res != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "vkQueuePresentKHR failed: %u", res);
|
|
||||||
}
|
|
||||||
|
|
||||||
rtFlushGPUTransfers();
|
|
||||||
}
|
|
@ -1,22 +0,0 @@
|
|||||||
#ifndef RT_VK_FRAMEBUFFER_H
|
|
||||||
#define RT_VK_FRAMEBUFFER_H
|
|
||||||
|
|
||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkFramebuffer framebuffer;
|
|
||||||
uint32_t pass_idx;
|
|
||||||
} rt_framebuffer;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t index;
|
|
||||||
} rt_framebuffer_handle;
|
|
||||||
|
|
||||||
/* Reserve a slot, but don't actually create the framebuffer yet.
|
|
||||||
* We can use this if we are unsure if the framebuffer will really be needed.
|
|
||||||
*/
|
|
||||||
rt_framebuffer_handle rt_reserve_framebuffer(void);
|
|
||||||
|
|
||||||
rt_framebuffer *rt_get_framebuffer(rt_framebuffer_handle handle);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,107 +0,0 @@
|
|||||||
#ifndef RT_VK_GPU_H
|
|
||||||
#define RT_VK_GPU_H
|
|
||||||
|
|
||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
#define VMA_STATIC_VULKAN_FUNCTIONS 0
|
|
||||||
#define VMA_DYNAMI_VULKAN_FUNCTIONS 0
|
|
||||||
#include <vma/vk_mem_alloc.h>
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
/* Used to mark a resource as not owned by a particular queue */
|
|
||||||
#define RT_VK_UNOWNED 255
|
|
||||||
|
|
||||||
/* Minimum supported value of g_gpu.max_frames_in_flight */
|
|
||||||
#define RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT 2
|
|
||||||
|
|
||||||
/* Maximum supported number of frames in flight.
|
|
||||||
* The actually configured value is contained in g_gpu. */
|
|
||||||
#define RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT 3
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
struct HINSTANCE__;
|
|
||||||
struct HWND__;
|
|
||||||
#elif defined(RT_USE_XLIB)
|
|
||||||
struct _XDisplay;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
#ifdef _WIN32
|
|
||||||
struct HINSTANCE__ *hInstance;
|
|
||||||
struct HWND__ *hWnd;
|
|
||||||
#elif defined(RT_USE_XLIB)
|
|
||||||
struct _XDisplay *display;
|
|
||||||
unsigned long window;
|
|
||||||
#endif
|
|
||||||
} rt_native_window;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t swapchain_image_index;
|
|
||||||
VkSemaphore image_available;
|
|
||||||
VkSemaphore render_finished;
|
|
||||||
VkSemaphore swapchain_transitioned;
|
|
||||||
} rt_frame_data;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkInstance instance;
|
|
||||||
VkDebugUtilsMessengerEXT messenger;
|
|
||||||
VkAllocationCallbacks *alloc_cb;
|
|
||||||
VkPhysicalDevice phys_device;
|
|
||||||
VkDevice device;
|
|
||||||
VkSurfaceKHR surface;
|
|
||||||
VkQueue graphics_queue;
|
|
||||||
VkQueue compute_queue;
|
|
||||||
VkQueue present_queue;
|
|
||||||
VkQueue transfer_queue;
|
|
||||||
uint32_t graphics_family;
|
|
||||||
uint32_t compute_family;
|
|
||||||
uint32_t present_family;
|
|
||||||
uint32_t transfer_family;
|
|
||||||
|
|
||||||
rt_native_window native_window;
|
|
||||||
|
|
||||||
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props;
|
|
||||||
VkPhysicalDeviceProperties phys_device_props;
|
|
||||||
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features;
|
|
||||||
VkPhysicalDeviceFeatures phys_device_features;
|
|
||||||
|
|
||||||
VmaAllocator allocator;
|
|
||||||
|
|
||||||
unsigned int max_frames_in_flight;
|
|
||||||
unsigned int current_frame_id;
|
|
||||||
|
|
||||||
rt_frame_data frames[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
|
|
||||||
} rt_vk_gpu;
|
|
||||||
|
|
||||||
#ifndef RT_VK_DONT_DEFINE_GPU_GLOBAL
|
|
||||||
extern rt_vk_gpu g_gpu;
|
|
||||||
|
|
||||||
|
|
||||||
RT_INLINE rt_frame_data *rtGetFrameData(unsigned int frame_id) {
|
|
||||||
return &g_gpu.frames[frame_id % g_gpu.max_frames_in_flight];
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Helper functions */
|
|
||||||
|
|
||||||
#define RT_VK_CHECK(expr) \
|
|
||||||
do { \
|
|
||||||
VkResult res = expr; \
|
|
||||||
if (res != VK_SUCCESS) { \
|
|
||||||
rtReportError("vk", "Vulkan command failed with error %u.\nCommand: %s", res, #expr); \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
VkFormat rtPixelFormatToVkFormat(rt_pixel_format format);
|
|
||||||
|
|
||||||
VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count);
|
|
||||||
|
|
||||||
VkQueue rtGetQueue(rt_gpu_queue queue);
|
|
||||||
|
|
||||||
uint32_t rtGetQueueFamily(rt_gpu_queue queue);
|
|
||||||
|
|
||||||
const char *rtVkFormatToString(VkFormat format);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,192 +0,0 @@
|
|||||||
#include "gpu.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_VkMaxSemaphores, "Maximum number of semaphores. Default: 1024", 1024);
|
|
||||||
|
|
||||||
#define SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX 0xffffff
|
|
||||||
#define RENDER_FINISHED_SEMAPHORE_INDEX 0xfffffe
|
|
||||||
|
|
||||||
typedef struct rt_gpu_semaphore_s {
|
|
||||||
uint32_t version;
|
|
||||||
VkSemaphore semaphore[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
|
|
||||||
uint64_t current_value[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
|
|
||||||
/* TODO: State tracking. We want to ensure that we don't introduce gpu hangs by waiting on a
|
|
||||||
* not-signaled semaphore. */
|
|
||||||
|
|
||||||
struct rt_gpu_semaphore_s *next_free;
|
|
||||||
} rt_gpu_semaphore;
|
|
||||||
|
|
||||||
static rt_gpu_semaphore *_semaphores;
|
|
||||||
static rt_gpu_semaphore *_first_free;
|
|
||||||
static rt_mutex *_lock;
|
|
||||||
|
|
||||||
static void DestroySemaphore(rt_gpu_semaphore *s) {
|
|
||||||
for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
|
||||||
vkDestroySemaphore(g_gpu.device, s->semaphore[i], g_gpu.alloc_cb);
|
|
||||||
s->semaphore[i] = VK_NULL_HANDLE;
|
|
||||||
}
|
|
||||||
rtLockMutex(_lock);
|
|
||||||
s->next_free = _first_free;
|
|
||||||
_first_free = s;
|
|
||||||
rtUnlockMutex(_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result InitializeSempahoreManagement(void) {
|
|
||||||
_semaphores = calloc(rt_VkMaxSemaphores.i, sizeof(rt_gpu_semaphore));
|
|
||||||
if (!_semaphores)
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
|
|
||||||
_lock = rtCreateMutex();
|
|
||||||
if (!_lock) {
|
|
||||||
free(_semaphores);
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Keep 0 unused for the invalid handle */
|
|
||||||
_first_free = &_semaphores[1];
|
|
||||||
for (int i = 1; i < rt_VkMaxSemaphores.i - 1; ++i)
|
|
||||||
_semaphores[i].next_free = &_semaphores[i + 1];
|
|
||||||
_semaphores[rt_VkMaxSemaphores.i - 1].next_free = NULL;
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownSemaphoreManagement(void) {
|
|
||||||
for (int i = 1; i < rt_VkMaxSemaphores.i; ++i) {
|
|
||||||
for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j)
|
|
||||||
vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore[j], g_gpu.alloc_cb);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count,
|
|
||||||
const rt_gpu_semaphore_info *info,
|
|
||||||
rt_gpu_semaphore_handle *p_semaphores) {
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
rtLockMutex(_lock);
|
|
||||||
rt_gpu_semaphore *sem = _first_free;
|
|
||||||
if (sem)
|
|
||||||
_first_free = sem->next_free;
|
|
||||||
rtUnlockMutex(_lock);
|
|
||||||
|
|
||||||
if (!sem) {
|
|
||||||
for (uint32_t j = 0; j < i; ++j) {
|
|
||||||
uint32_t index = p_semaphores[j].index;
|
|
||||||
DestroySemaphore(&_semaphores[index]);
|
|
||||||
}
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
sem->version = (sem->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
|
|
||||||
for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j) {
|
|
||||||
VkSemaphoreTypeCreateInfo type_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
|
|
||||||
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
|
|
||||||
.initialValue = info[i].initial_value,
|
|
||||||
};
|
|
||||||
VkSemaphoreCreateInfo semaphore_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
|
||||||
.pNext = &type_info,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (vkCreateSemaphore(g_gpu.device,
|
|
||||||
&semaphore_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&sem->semaphore[j]) != VK_SUCCESS) {
|
|
||||||
for (uint32_t k = 0; k < i; ++k) {
|
|
||||||
uint32_t index = p_semaphores[k].index;
|
|
||||||
DestroySemaphore(&_semaphores[index]);
|
|
||||||
}
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
char name[128];
|
|
||||||
rtSPrint(name, 128, "%s (%u)", (info->name) ? info->name : "Unnamed Semaphore", j);
|
|
||||||
VkDebugUtilsObjectNameInfoEXT name_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
|
||||||
.objectHandle = (uint64_t)sem->semaphore[j],
|
|
||||||
.objectType = VK_OBJECT_TYPE_SEMAPHORE,
|
|
||||||
.pObjectName = name,
|
|
||||||
};
|
|
||||||
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
sem->current_value[j] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
p_semaphores[i].version = (unsigned char)sem->version;
|
|
||||||
p_semaphores[i].index = (uint32_t)(sem - _semaphores);
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) {
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
uint32_t index = semaphores[i].index;
|
|
||||||
if (index >= (uint32_t)rt_VkMaxSemaphores.i)
|
|
||||||
continue;
|
|
||||||
if (semaphores[i].version != _semaphores[index].version) {
|
|
||||||
rtLog("vk",
|
|
||||||
"Tried to destroy semaphore %u with version %u, but the semaphore has version %u",
|
|
||||||
index,
|
|
||||||
semaphores[i].version,
|
|
||||||
_semaphores[index].version);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
DestroySemaphore(&_semaphores[index]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle) {
|
|
||||||
uint32_t index = handle.index;
|
|
||||||
|
|
||||||
if (index == SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX) {
|
|
||||||
rt_frame_data *fd = rtGetFrameData(g_gpu.current_frame_id);
|
|
||||||
return fd->image_available;
|
|
||||||
} else if (index == RENDER_FINISHED_SEMAPHORE_INDEX) {
|
|
||||||
rt_frame_data *fd = rtGetFrameData(g_gpu.current_frame_id);
|
|
||||||
return fd->render_finished;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!RT_IS_HANDLE_VALID(handle) || index >= (uint32_t)rt_VkMaxSemaphores.i)
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
if (_semaphores[index].version != handle.version)
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
|
||||||
return _semaphores[index].semaphore[frame];
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore) {
|
|
||||||
uint32_t index = semaphore.index;
|
|
||||||
if (!RT_IS_HANDLE_VALID(semaphore) || index >= (uint32_t)rt_VkMaxSemaphores.i)
|
|
||||||
return 0;
|
|
||||||
if (_semaphores[index].version != semaphore.version)
|
|
||||||
return 0;
|
|
||||||
uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
|
||||||
vkGetSemaphoreCounterValue(g_gpu.device,
|
|
||||||
_semaphores[index].semaphore[frame],
|
|
||||||
&_semaphores[index].current_value[frame]);
|
|
||||||
return _semaphores[index].current_value[frame];
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) {
|
|
||||||
return (rt_gpu_semaphore_handle){
|
|
||||||
.version = 1,
|
|
||||||
.index = SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) {
|
|
||||||
return (rt_gpu_semaphore_handle){
|
|
||||||
.version = 1,
|
|
||||||
.index = RENDER_FINISHED_SEMAPHORE_INDEX,
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
#ifndef RT_VK_GPU_SYNC_H
|
|
||||||
#define RT_VK_GPU_SYNC_H
|
|
||||||
|
|
||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,97 +0,0 @@
|
|||||||
#include "gpu.h"
|
|
||||||
|
|
||||||
VkFormat rtPixelFormatToVkFormat(rt_pixel_format format) {
|
|
||||||
switch (format) {
|
|
||||||
case RT_PIXEL_FORMAT_R8G8B8A8_UNORM:
|
|
||||||
return VK_FORMAT_R8G8B8A8_UNORM;
|
|
||||||
case RT_PIXEL_FORMAT_B8G8R8A8_UNORM:
|
|
||||||
return VK_FORMAT_B8G8R8A8_UNORM;
|
|
||||||
case RT_PIXEL_FORMAT_R8G8B8A8_SRGB:
|
|
||||||
return VK_FORMAT_R8G8B8A8_SRGB;
|
|
||||||
case RT_PIXEL_FORMAT_B8G8R8A8_SRGB:
|
|
||||||
return VK_FORMAT_B8G8R8A8_SRGB;
|
|
||||||
case RT_PIXEL_FORMAT_R8G8B8_UNORM:
|
|
||||||
return VK_FORMAT_R8G8B8_UNORM;
|
|
||||||
case RT_PIXEL_FORMAT_B8G8R8_UNORM:
|
|
||||||
return VK_FORMAT_B8G8R8_UNORM;
|
|
||||||
case RT_PIXEL_FORMAT_R8G8B8_SRGB:
|
|
||||||
return VK_FORMAT_R8G8B8_SRGB;
|
|
||||||
case RT_PIXEL_FORMAT_B8G8R8_SRGB:
|
|
||||||
return VK_FORMAT_B8G8R8_SRGB;
|
|
||||||
case RT_PIXEL_FORMAT_DEPTH24_STENCIL8:
|
|
||||||
return VK_FORMAT_D24_UNORM_S8_UINT;
|
|
||||||
case RT_PIXEL_FORMAT_DEPTH32:
|
|
||||||
return VK_FORMAT_D32_SFLOAT;
|
|
||||||
default:
|
|
||||||
return VK_FORMAT_UNDEFINED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count) {
|
|
||||||
/* Limit to what the gpu supports */
|
|
||||||
VkSampleCountFlags counts = g_gpu.phys_device_props.limits.framebufferColorSampleCounts &
|
|
||||||
g_gpu.phys_device_props.limits.framebufferDepthSampleCounts &
|
|
||||||
g_gpu.phys_device_props.limits.sampledImageColorSampleCounts &
|
|
||||||
g_gpu.phys_device_props.limits.sampledImageDepthSampleCounts;
|
|
||||||
while (count > 1) {
|
|
||||||
if ((counts & count) == 0)
|
|
||||||
count >>= 1;
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return (VkSampleCountFlagBits)count;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkQueue rtGetQueue(rt_gpu_queue queue) {
|
|
||||||
switch (queue) {
|
|
||||||
case RT_GRAPHICS_QUEUE:
|
|
||||||
return g_gpu.graphics_queue;
|
|
||||||
case RT_COMPUTE_QUEUE:
|
|
||||||
return g_gpu.compute_queue;
|
|
||||||
case RT_TRANSFER_QUEUE:
|
|
||||||
return g_gpu.transfer_queue;
|
|
||||||
default:
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t rtGetQueueFamily(rt_gpu_queue queue) {
|
|
||||||
switch (queue) {
|
|
||||||
case RT_GRAPHICS_QUEUE:
|
|
||||||
return g_gpu.graphics_family;
|
|
||||||
case RT_COMPUTE_QUEUE:
|
|
||||||
return g_gpu.compute_family;
|
|
||||||
case RT_TRANSFER_QUEUE:
|
|
||||||
return g_gpu.transfer_family;
|
|
||||||
default:
|
|
||||||
return UINT32_MAX;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
const char *rtVkFormatToString(VkFormat format) {
|
|
||||||
switch (format) {
|
|
||||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
|
||||||
return "R8G8B8A8_UNORM";
|
|
||||||
case VK_FORMAT_B8G8R8A8_UNORM:
|
|
||||||
return "B8G8R8A8_UNORM";
|
|
||||||
case VK_FORMAT_R8G8B8A8_SRGB:
|
|
||||||
return "R8G8B8A8_SRGB";
|
|
||||||
case VK_FORMAT_B8G8R8A8_SRGB:
|
|
||||||
return "B8G8R8A8_SRGB";
|
|
||||||
case VK_FORMAT_R8G8B8_UNORM:
|
|
||||||
return "R8G8B8_UNORM";
|
|
||||||
case VK_FORMAT_B8G8R8_UNORM:
|
|
||||||
return "B8G8R8_UNORM";
|
|
||||||
case VK_FORMAT_R8G8B8_SRGB:
|
|
||||||
return "R8G8B8_SRGB";
|
|
||||||
case VK_FORMAT_B8G8R8_SRGB:
|
|
||||||
return "B8G8R8_SRGB";
|
|
||||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
|
||||||
return "D24_UNORM_S8_UINT";
|
|
||||||
case VK_FORMAT_D32_SFLOAT:
|
|
||||||
return "D32_SFLOAT";
|
|
||||||
default:
|
|
||||||
return "UNDEFINED";
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,737 +0,0 @@
|
|||||||
#include <malloc.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#define RT_VK_DONT_DEFINE_GPU_GLOBAL
|
|
||||||
#include "gpu.h"
|
|
||||||
#include "render_targets.h"
|
|
||||||
#include "swapchain.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
#define TARGET_API_VERSION VK_API_VERSION_1_3
|
|
||||||
|
|
||||||
RT_CVAR_I(r_VkEnableAPIAllocTracking,
|
|
||||||
"Enable tracking of allocations done by the vulkan api. [0/1] Default: 0",
|
|
||||||
0);
|
|
||||||
|
|
||||||
RT_CVAR_S(r_VkPhysDeviceName, "Name of the selected physical device. Default: \"\"", "");
|
|
||||||
|
|
||||||
RT_CVAR_I(r_VkMaxFramesInFlight, "Maximum number of frames in flight. [2/3] Default: 2", 2);
|
|
||||||
|
|
||||||
rt_vk_gpu g_gpu;
|
|
||||||
|
|
||||||
static VkAllocationCallbacks _tracking_alloc_cbs;
|
|
||||||
|
|
||||||
static const char *AllocationScopeToString(VkSystemAllocationScope scope) {
|
|
||||||
switch (scope) {
|
|
||||||
case VK_SYSTEM_ALLOCATION_SCOPE_COMMAND:
|
|
||||||
return "COMMAND";
|
|
||||||
case VK_SYSTEM_ALLOCATION_SCOPE_OBJECT:
|
|
||||||
return "OBJECT";
|
|
||||||
case VK_SYSTEM_ALLOCATION_SCOPE_CACHE:
|
|
||||||
return "CACHE";
|
|
||||||
case VK_SYSTEM_ALLOCATION_SCOPE_DEVICE:
|
|
||||||
return "DEVICE";
|
|
||||||
case VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE:
|
|
||||||
return "INSTANCE";
|
|
||||||
default:
|
|
||||||
return "UNKNOWN";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void *
|
|
||||||
TrackAllocation(void *userData, size_t size, size_t alignment, VkSystemAllocationScope scope) {
|
|
||||||
rtLog("vk",
|
|
||||||
"Allocation. Size: %zu, Alignment: %zu, Scope: %s",
|
|
||||||
size,
|
|
||||||
alignment,
|
|
||||||
AllocationScopeToString(scope));
|
|
||||||
#ifdef _WIN32
|
|
||||||
return _aligned_malloc(size, alignment);
|
|
||||||
#else
|
|
||||||
return aligned_alloc(alignment, size);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static void *TrackReallocation(void *userData,
|
|
||||||
void *original,
|
|
||||||
size_t size,
|
|
||||||
size_t alignment,
|
|
||||||
VkSystemAllocationScope scope) {
|
|
||||||
rtLog("vk",
|
|
||||||
"Reallocation. Size: %zu, Alignment: %zu, Scope: %s",
|
|
||||||
size,
|
|
||||||
alignment,
|
|
||||||
AllocationScopeToString(scope));
|
|
||||||
return realloc(original, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TrackFree(void *userData, void *memory) {
|
|
||||||
free(memory);
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkBool32 VKAPI_PTR
|
|
||||||
DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
|
|
||||||
VkDebugUtilsMessageTypeFlagsEXT types,
|
|
||||||
const VkDebugUtilsMessengerCallbackDataEXT *callbackData,
|
|
||||||
void *userData) {
|
|
||||||
if (severity < VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)
|
|
||||||
return VK_FALSE;
|
|
||||||
|
|
||||||
const char *severity_str = "<UNKNOWN>";
|
|
||||||
if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)
|
|
||||||
severity_str = "WARNING";
|
|
||||||
else if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
|
|
||||||
severity_str = "ERROR";
|
|
||||||
rtLog("vk", "[%s] %s", severity_str, callbackData->pMessage);
|
|
||||||
if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
|
|
||||||
RT_DEBUGBREAK;
|
|
||||||
return VK_FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern rt_cvar r_VkPreferredSwapchainImages;
|
|
||||||
extern rt_cvar r_VkPreferMailboxMode;
|
|
||||||
extern rt_cvar r_VkMaxPipelineCount;
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(RegisterCVars)(void) {
|
|
||||||
rtRegisterCVAR(&r_VkEnableAPIAllocTracking);
|
|
||||||
rtRegisterCVAR(&r_VkPhysDeviceName);
|
|
||||||
rtRegisterCVAR(&r_VkPreferredSwapchainImages);
|
|
||||||
rtRegisterCVAR(&r_VkPreferMailboxMode);
|
|
||||||
rtRegisterCVAR(&r_VkMaxFramesInFlight);
|
|
||||||
rtRegisterCVAR(&r_VkMaxPipelineCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result CreateInstance(void) {
|
|
||||||
VkResult result = volkInitialize();
|
|
||||||
if (result != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Initialization failed: volkInitialize()");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkApplicationInfo app_info = {
|
|
||||||
.apiVersion = TARGET_API_VERSION,
|
|
||||||
.applicationVersion = 0x00001000,
|
|
||||||
.engineVersion = 0x00001000,
|
|
||||||
.pEngineName = "voyageEngine",
|
|
||||||
.pApplicationName = "Voyage",
|
|
||||||
};
|
|
||||||
|
|
||||||
const char *extensions[] = {
|
|
||||||
VK_KHR_SURFACE_EXTENSION_NAME,
|
|
||||||
#ifdef _WIN32
|
|
||||||
"VK_KHR_win32_surface",
|
|
||||||
#elif defined(RT_USE_XLIB)
|
|
||||||
"VK_KHR_xlib_surface",
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
VK_EXT_DEBUG_UTILS_EXTENSION_NAME,
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
const char *layers[1];
|
|
||||||
unsigned int layer_count = 0;
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
/* Search for layers we want to enable */
|
|
||||||
uint32_t available_layer_count = 0;
|
|
||||||
result = vkEnumerateInstanceLayerProperties(&available_layer_count, NULL);
|
|
||||||
if (result == VK_SUCCESS) {
|
|
||||||
VkLayerProperties *props = calloc(available_layer_count, sizeof(VkLayerProperties));
|
|
||||||
if (props) {
|
|
||||||
vkEnumerateInstanceLayerProperties(&available_layer_count, props);
|
|
||||||
for (uint32_t i = 0; i < available_layer_count; ++i) {
|
|
||||||
if (strcmp(props[i].layerName, "VK_LAYER_KHRONOS_validation") == 0) {
|
|
||||||
layers[0] = "VK_LAYER_KHRONOS_validation";
|
|
||||||
layer_count = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free(props);
|
|
||||||
} else {
|
|
||||||
rtLog("vk", "Failed to allocate storage for instance layer properties.");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
rtLog("vk", "vkEnumerateInstanceLayerProperties failed.");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
VkInstanceCreateInfo instance_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
|
||||||
.pApplicationInfo = &app_info,
|
|
||||||
.ppEnabledExtensionNames = extensions,
|
|
||||||
.enabledExtensionCount = RT_ARRAY_COUNT(extensions),
|
|
||||||
.ppEnabledLayerNames = layers,
|
|
||||||
.enabledLayerCount = layer_count,
|
|
||||||
};
|
|
||||||
result = vkCreateInstance(&instance_info, g_gpu.alloc_cb, &g_gpu.instance);
|
|
||||||
if (result != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to create the vulkan instance.");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
volkLoadInstance(g_gpu.instance);
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
/* Create the debug utils messenger */
|
|
||||||
VkDebugUtilsMessengerCreateInfoEXT messenger_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
|
|
||||||
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
|
|
||||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
|
|
||||||
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
|
|
||||||
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
|
|
||||||
.pfnUserCallback = DebugUtilsMessengerCb,
|
|
||||||
};
|
|
||||||
vkCreateDebugUtilsMessengerEXT(g_gpu.instance,
|
|
||||||
&messenger_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&g_gpu.messenger);
|
|
||||||
#endif
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result CreateSurface(const rt_renderer_init_info *info) {
|
|
||||||
#ifdef _WIN32
|
|
||||||
g_gpu.native_window.hInstance = info->hInstance;
|
|
||||||
g_gpu.native_window.hWnd = info->hWnd;
|
|
||||||
VkWin32SurfaceCreateInfoKHR surface_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
|
|
||||||
.hinstance = info->hInstance,
|
|
||||||
.hwnd = info->hWnd,
|
|
||||||
};
|
|
||||||
if (vkCreateWin32SurfaceKHR(g_gpu.instance, &surface_info, g_gpu.alloc_cb, &g_gpu.surface) ==
|
|
||||||
VK_SUCCESS)
|
|
||||||
return RT_SUCCESS;
|
|
||||||
else
|
|
||||||
return 100;
|
|
||||||
#elif defined(RT_USE_XLIB)
|
|
||||||
g_gpu.native_window.display = info->display;
|
|
||||||
g_gpu.native_window.window = info->window;
|
|
||||||
VkXlibSurfaceCreateInfoKHR surface_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
|
|
||||||
.dpy = info->display,
|
|
||||||
.window = info->window,
|
|
||||||
};
|
|
||||||
if (vkCreateXlibSurfaceKHR(g_gpu.instance, &surface_info, g_gpu.alloc_cb, &g_gpu.surface) ==
|
|
||||||
VK_SUCCESS)
|
|
||||||
return RT_SUCCESS;
|
|
||||||
else
|
|
||||||
return 100;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint32_t graphics;
|
|
||||||
uint32_t compute;
|
|
||||||
uint32_t present;
|
|
||||||
uint32_t transfer;
|
|
||||||
} rt_queue_indices;
|
|
||||||
|
|
||||||
static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfaceKHR surface) {
|
|
||||||
rt_queue_indices indices = {.graphics = UINT32_MAX,
|
|
||||||
.compute = UINT32_MAX,
|
|
||||||
.present = UINT32_MAX,
|
|
||||||
.transfer = UINT32_MAX};
|
|
||||||
|
|
||||||
uint32_t count = 0;
|
|
||||||
vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, NULL);
|
|
||||||
VkQueueFamilyProperties *props = calloc(count, sizeof(VkQueueFamilyProperties));
|
|
||||||
if (!props) {
|
|
||||||
return indices;
|
|
||||||
}
|
|
||||||
vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, props);
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
if (props[i].queueCount == 0)
|
|
||||||
continue;
|
|
||||||
if ((props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0)
|
|
||||||
indices.graphics = i;
|
|
||||||
if ((props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
|
|
||||||
indices.compute = i;
|
|
||||||
if ((props[i].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
|
|
||||||
indices.transfer = i;
|
|
||||||
|
|
||||||
VkBool32 present_supported = VK_FALSE;
|
|
||||||
vkGetPhysicalDeviceSurfaceSupportKHR(phys_dev, i, surface, &present_supported);
|
|
||||||
if (present_supported)
|
|
||||||
indices.present = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (indices.transfer == UINT32_MAX && indices.graphics != UINT32_MAX)
|
|
||||||
indices.transfer = indices.graphics;
|
|
||||||
else if (indices.transfer == UINT32_MAX && indices.compute != UINT32_MAX)
|
|
||||||
indices.transfer = indices.compute;
|
|
||||||
|
|
||||||
free(props);
|
|
||||||
return indices;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool CheckDeviceExtensionSupported(VkPhysicalDevice phys_dev) {
|
|
||||||
const char *required_extensions[] = {
|
|
||||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
|
||||||
};
|
|
||||||
|
|
||||||
uint32_t extension_count;
|
|
||||||
vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, NULL);
|
|
||||||
|
|
||||||
VkExtensionProperties *supported_extensions =
|
|
||||||
calloc(extension_count, sizeof(VkExtensionProperties));
|
|
||||||
if (!supported_extensions)
|
|
||||||
return false;
|
|
||||||
vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, supported_extensions);
|
|
||||||
|
|
||||||
bool supported = true;
|
|
||||||
for (uint32_t i = 0; i < RT_ARRAY_COUNT(required_extensions); ++i) {
|
|
||||||
bool found = false;
|
|
||||||
for (uint32_t j = 0; j < extension_count; ++j) {
|
|
||||||
if (strncmp(supported_extensions[j].extensionName,
|
|
||||||
required_extensions[i],
|
|
||||||
VK_MAX_EXTENSION_NAME_SIZE) == 0) {
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!found) {
|
|
||||||
supported = false;
|
|
||||||
VkPhysicalDeviceProperties props;
|
|
||||||
vkGetPhysicalDeviceProperties(phys_dev, &props);
|
|
||||||
rtLog("Device %s does not support the required extension %s",
|
|
||||||
props.deviceName,
|
|
||||||
required_extensions[i]);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
free(supported_extensions);
|
|
||||||
return supported;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result ChoosePhysicalDevice(void) {
|
|
||||||
|
|
||||||
g_gpu.phys_device = VK_NULL_HANDLE;
|
|
||||||
uint32_t phys_device_count = 0;
|
|
||||||
VkResult result = vkEnumeratePhysicalDevices(g_gpu.instance, &phys_device_count, NULL);
|
|
||||||
if (result != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to enumerate the physical devices.");
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
VkPhysicalDevice *phys_devices = calloc(phys_device_count, sizeof(VkPhysicalDevice));
|
|
||||||
if (!phys_devices) {
|
|
||||||
rtReportError("vk", "Failed to enumerate the physical devices: Out of memory.");
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
vkEnumeratePhysicalDevices(g_gpu.instance, &phys_device_count, phys_devices);
|
|
||||||
|
|
||||||
uint32_t highscore = 0;
|
|
||||||
uint32_t best_index = phys_device_count;
|
|
||||||
for (uint32_t i = 0; i < phys_device_count; ++i) {
|
|
||||||
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceSynchronization2Features synchronization2_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
|
|
||||||
.pNext = &timeline_semaphore_features,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
|
|
||||||
.pNext = &synchronization2_features,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
|
|
||||||
.pNext = &dynamic_rendering_features,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceFeatures2 features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
|
||||||
.pNext = &descriptor_indexing_features,
|
|
||||||
};
|
|
||||||
vkGetPhysicalDeviceFeatures2(phys_devices[i], &features);
|
|
||||||
|
|
||||||
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES,
|
|
||||||
.pNext = NULL,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceProperties2 props = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
|
|
||||||
.pNext = &descriptor_indexing_props,
|
|
||||||
};
|
|
||||||
vkGetPhysicalDeviceProperties2(phys_devices[i], &props);
|
|
||||||
|
|
||||||
if (!CheckDeviceExtensionSupported(phys_devices[i]))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
rt_queue_indices indices = RetrieveQueueIndices(phys_devices[i], g_gpu.surface);
|
|
||||||
if (indices.compute == UINT32_MAX || indices.present == UINT32_MAX ||
|
|
||||||
indices.graphics == UINT32_MAX)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!synchronization2_features.synchronization2 ||
|
|
||||||
!dynamic_rendering_features.dynamicRendering ||
|
|
||||||
!timeline_semaphore_features.timelineSemaphore)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Check for bindless support */
|
|
||||||
if (!descriptor_indexing_features.runtimeDescriptorArray ||
|
|
||||||
!descriptor_indexing_features.descriptorBindingPartiallyBound)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
uint32_t score = 0;
|
|
||||||
|
|
||||||
if (props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)
|
|
||||||
score += 100;
|
|
||||||
|
|
||||||
score += (props.properties.limits.maxFramebufferWidth / 100) *
|
|
||||||
(props.properties.limits.maxFramebufferHeight / 100);
|
|
||||||
|
|
||||||
score +=
|
|
||||||
(descriptor_indexing_props.shaderStorageBufferArrayNonUniformIndexingNative) ? 100 : 0;
|
|
||||||
score +=
|
|
||||||
(descriptor_indexing_props.shaderSampledImageArrayNonUniformIndexingNative) ? 100 : 0;
|
|
||||||
|
|
||||||
if (score > highscore) {
|
|
||||||
highscore = score;
|
|
||||||
best_index = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strncmp(props.properties.deviceName,
|
|
||||||
r_VkPhysDeviceName.s,
|
|
||||||
VK_MAX_PHYSICAL_DEVICE_NAME_SIZE) == 0) {
|
|
||||||
best_index = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (best_index < phys_device_count) {
|
|
||||||
g_gpu.phys_device = phys_devices[best_index];
|
|
||||||
|
|
||||||
VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES,
|
|
||||||
.pNext = NULL,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceProperties2 props = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
|
|
||||||
.pNext = &descriptor_indexing_props,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceFeatures2 features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
|
||||||
.pNext = &descriptor_indexing_features,
|
|
||||||
};
|
|
||||||
vkGetPhysicalDeviceFeatures2(phys_devices[best_index], &features);
|
|
||||||
vkGetPhysicalDeviceProperties2(phys_devices[best_index], &props);
|
|
||||||
|
|
||||||
g_gpu.phys_device_props = props.properties;
|
|
||||||
g_gpu.descriptor_indexing_props = descriptor_indexing_props;
|
|
||||||
g_gpu.phys_device_features = features.features;
|
|
||||||
g_gpu.descriptor_indexing_features = descriptor_indexing_features;
|
|
||||||
}
|
|
||||||
free(phys_devices);
|
|
||||||
|
|
||||||
if (g_gpu.phys_device == VK_NULL_HANDLE) {
|
|
||||||
rtReportError("vk", "Failed to find a suitable physical device.");
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result CreateDevice(void) {
|
|
||||||
const char *extensions[] = {
|
|
||||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
|
||||||
};
|
|
||||||
|
|
||||||
rt_queue_indices queue_indices = RetrieveQueueIndices(g_gpu.phys_device, g_gpu.surface);
|
|
||||||
|
|
||||||
g_gpu.compute_family = queue_indices.compute;
|
|
||||||
g_gpu.graphics_family = queue_indices.graphics;
|
|
||||||
g_gpu.present_family = queue_indices.present;
|
|
||||||
g_gpu.transfer_family = queue_indices.transfer;
|
|
||||||
|
|
||||||
float priority = 1.f;
|
|
||||||
|
|
||||||
uint32_t distinct_queue_count = 1;
|
|
||||||
VkDeviceQueueCreateInfo queue_info[4];
|
|
||||||
queue_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
|
||||||
queue_info[0].pNext = NULL;
|
|
||||||
queue_info[0].flags = 0;
|
|
||||||
queue_info[0].queueCount = 1;
|
|
||||||
queue_info[0].queueFamilyIndex = queue_indices.graphics;
|
|
||||||
queue_info[0].pQueuePriorities = &priority;
|
|
||||||
if (queue_indices.compute != queue_indices.graphics) {
|
|
||||||
queue_info[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
|
||||||
queue_info[1].pNext = NULL;
|
|
||||||
queue_info[1].flags = 0;
|
|
||||||
queue_info[1].queueCount = 1;
|
|
||||||
queue_info[1].queueFamilyIndex = queue_indices.compute;
|
|
||||||
queue_info[1].pQueuePriorities = &priority;
|
|
||||||
++distinct_queue_count;
|
|
||||||
}
|
|
||||||
if (queue_indices.present != queue_indices.graphics &&
|
|
||||||
queue_indices.present != queue_indices.compute) {
|
|
||||||
queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
|
||||||
queue_info[distinct_queue_count].pNext = NULL;
|
|
||||||
queue_info[distinct_queue_count].flags = 0;
|
|
||||||
queue_info[distinct_queue_count].queueCount = 1;
|
|
||||||
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.present;
|
|
||||||
queue_info[distinct_queue_count].pQueuePriorities = &priority;
|
|
||||||
++distinct_queue_count;
|
|
||||||
}
|
|
||||||
if (queue_indices.transfer != queue_indices.graphics &&
|
|
||||||
queue_indices.transfer != queue_indices.compute) {
|
|
||||||
queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
|
||||||
queue_info[distinct_queue_count].pNext = NULL;
|
|
||||||
queue_info[distinct_queue_count].flags = 0;
|
|
||||||
queue_info[distinct_queue_count].queueCount = 1;
|
|
||||||
queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.transfer;
|
|
||||||
queue_info[distinct_queue_count].pQueuePriorities = &priority;
|
|
||||||
++distinct_queue_count;
|
|
||||||
}
|
|
||||||
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceSynchronization2Features synchronization2_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
|
|
||||||
.pNext = &timeline_semaphore_features,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
|
|
||||||
.pNext = &synchronization2_features,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceDescriptorIndexingFeatures indexing_features = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
|
|
||||||
.pNext = &dynamic_rendering_features,
|
|
||||||
};
|
|
||||||
VkPhysicalDeviceFeatures2 features = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
|
||||||
.pNext = &indexing_features};
|
|
||||||
vkGetPhysicalDeviceFeatures2(g_gpu.phys_device, &features);
|
|
||||||
|
|
||||||
RT_ASSERT(indexing_features.runtimeDescriptorArray &&
|
|
||||||
indexing_features.descriptorBindingPartiallyBound,
|
|
||||||
"We require a device that supports bindless vulkan.");
|
|
||||||
|
|
||||||
VkDeviceCreateInfo device_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
|
||||||
.pNext = &features,
|
|
||||||
.enabledExtensionCount = RT_ARRAY_COUNT(extensions),
|
|
||||||
.ppEnabledExtensionNames = extensions,
|
|
||||||
.pQueueCreateInfos = queue_info,
|
|
||||||
.queueCreateInfoCount = distinct_queue_count,
|
|
||||||
};
|
|
||||||
if (vkCreateDevice(g_gpu.phys_device, &device_info, g_gpu.alloc_cb, &g_gpu.device) !=
|
|
||||||
VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Device creation failed.");
|
|
||||||
return 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
vkGetDeviceQueue(g_gpu.device, queue_indices.graphics, 0, &g_gpu.graphics_queue);
|
|
||||||
vkGetDeviceQueue(g_gpu.device, queue_indices.compute, 0, &g_gpu.compute_queue);
|
|
||||||
vkGetDeviceQueue(g_gpu.device, queue_indices.present, 0, &g_gpu.present_queue);
|
|
||||||
vkGetDeviceQueue(g_gpu.device, queue_indices.transfer, 0, &g_gpu.transfer_queue);
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result CreateAllocator(void) {
|
|
||||||
#define SET_FNC(name) fncs.name = name
|
|
||||||
#define SET_KHR_FNC(name) (fncs).name##KHR = name
|
|
||||||
VmaVulkanFunctions fncs = {NULL};
|
|
||||||
SET_FNC(vkGetInstanceProcAddr);
|
|
||||||
SET_FNC(vkGetDeviceProcAddr);
|
|
||||||
SET_FNC(vkGetPhysicalDeviceProperties);
|
|
||||||
SET_FNC(vkGetPhysicalDeviceMemoryProperties);
|
|
||||||
SET_FNC(vkAllocateMemory);
|
|
||||||
SET_FNC(vkFreeMemory);
|
|
||||||
SET_FNC(vkMapMemory);
|
|
||||||
SET_FNC(vkUnmapMemory);
|
|
||||||
SET_FNC(vkFlushMappedMemoryRanges);
|
|
||||||
SET_FNC(vkInvalidateMappedMemoryRanges);
|
|
||||||
SET_FNC(vkBindBufferMemory);
|
|
||||||
SET_FNC(vkBindImageMemory);
|
|
||||||
SET_FNC(vkGetBufferMemoryRequirements);
|
|
||||||
SET_FNC(vkGetImageMemoryRequirements);
|
|
||||||
SET_FNC(vkCreateBuffer);
|
|
||||||
SET_FNC(vkDestroyBuffer);
|
|
||||||
SET_FNC(vkCreateImage);
|
|
||||||
SET_FNC(vkDestroyImage);
|
|
||||||
SET_FNC(vkCmdCopyBuffer);
|
|
||||||
SET_KHR_FNC(vkGetBufferMemoryRequirements2);
|
|
||||||
SET_KHR_FNC(vkGetImageMemoryRequirements2);
|
|
||||||
SET_KHR_FNC(vkBindBufferMemory2);
|
|
||||||
SET_KHR_FNC(vkBindImageMemory2);
|
|
||||||
SET_KHR_FNC(vkGetPhysicalDeviceMemoryProperties2);
|
|
||||||
SET_FNC(vkGetDeviceBufferMemoryRequirements);
|
|
||||||
SET_FNC(vkGetDeviceImageMemoryRequirements);
|
|
||||||
#undef SET_FNC
|
|
||||||
#undef SET_KHR_FNC
|
|
||||||
|
|
||||||
VmaAllocatorCreateInfo allocator_info = {
|
|
||||||
.instance = g_gpu.instance,
|
|
||||||
.physicalDevice = g_gpu.phys_device,
|
|
||||||
.device = g_gpu.device,
|
|
||||||
.pAllocationCallbacks = g_gpu.alloc_cb,
|
|
||||||
.vulkanApiVersion = TARGET_API_VERSION,
|
|
||||||
.pVulkanFunctions = &fncs,
|
|
||||||
};
|
|
||||||
|
|
||||||
return vmaCreateAllocator(&allocator_info, &g_gpu.allocator) == VK_SUCCESS ? RT_SUCCESS
|
|
||||||
: RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void DestroyAllocator(void) {
|
|
||||||
vmaDestroyAllocator(g_gpu.allocator);
|
|
||||||
}
|
|
||||||
|
|
||||||
static rt_result CreatePerFrameObjects(void) {
|
|
||||||
for (unsigned int i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
|
||||||
VkSemaphoreCreateInfo semaphore_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
|
||||||
};
|
|
||||||
if (vkCreateSemaphore(g_gpu.device,
|
|
||||||
&semaphore_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&g_gpu.frames[i].render_finished) != VK_SUCCESS) {
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
if (vkCreateSemaphore(g_gpu.device,
|
|
||||||
&semaphore_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&g_gpu.frames[i].image_available) != VK_SUCCESS) {
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
if (vkCreateSemaphore(g_gpu.device,
|
|
||||||
&semaphore_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&g_gpu.frames[i].swapchain_transitioned) != VK_SUCCESS) {
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
char name[128];
|
|
||||||
rtSPrint(name, 128, "Render Finished Semaphore (%u)", i);
|
|
||||||
VkDebugUtilsObjectNameInfoEXT name_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
|
||||||
.objectHandle = (uint64_t)g_gpu.frames[i].render_finished,
|
|
||||||
.objectType = VK_OBJECT_TYPE_SEMAPHORE,
|
|
||||||
.pObjectName = name,
|
|
||||||
};
|
|
||||||
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
|
|
||||||
|
|
||||||
rtSPrint(name, 128, "Image Available Semaphore (%u)", i);
|
|
||||||
name_info.objectHandle = (uint64_t)g_gpu.frames[i].image_available;
|
|
||||||
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
|
|
||||||
|
|
||||||
rtSPrint(name, 128, "Swapchain Transitioned Semaphore (%u)", i);
|
|
||||||
name_info.objectHandle = (uint64_t)g_gpu.frames[i].swapchain_transitioned;
|
|
||||||
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void DestroyPerFrameObjects(void) {
|
|
||||||
for (unsigned int i = 0; i < g_gpu.max_frames_in_flight; ++i) {
|
|
||||||
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].image_available, g_gpu.alloc_cb);
|
|
||||||
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].render_finished, g_gpu.alloc_cb);
|
|
||||||
vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].swapchain_transitioned, g_gpu.alloc_cb);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
extern rt_result InitPipelineManagement(void);
|
|
||||||
extern void ShutdownPipelineManagement(void);
|
|
||||||
extern rt_result InitRenderTargetManagement(void);
|
|
||||||
extern void ShutdownRenderTargetManagement(void);
|
|
||||||
extern rt_result InitCommandBufferManagement(void);
|
|
||||||
extern void ShutdownCommandBufferManagement(void);
|
|
||||||
extern rt_result InitializeSempahoreManagement(void);
|
|
||||||
extern void ShutdownSemaphoreManagement(void);
|
|
||||||
extern rt_result InitBufferManagement(void);
|
|
||||||
extern void ShutdownBufferManagement(void);
|
|
||||||
extern rt_result InitializeTransfers(void);
|
|
||||||
extern void ShutdownTransfers(void);
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
|
|
||||||
rtLog("vk", "Init");
|
|
||||||
|
|
||||||
_tracking_alloc_cbs.pUserData = NULL;
|
|
||||||
_tracking_alloc_cbs.pfnAllocation = TrackAllocation;
|
|
||||||
_tracking_alloc_cbs.pfnReallocation = TrackReallocation;
|
|
||||||
_tracking_alloc_cbs.pfnFree = TrackFree;
|
|
||||||
|
|
||||||
if (r_VkEnableAPIAllocTracking.i) {
|
|
||||||
g_gpu.alloc_cb = &_tracking_alloc_cbs;
|
|
||||||
} else {
|
|
||||||
g_gpu.alloc_cb = NULL;
|
|
||||||
}
|
|
||||||
g_gpu.max_frames_in_flight = RT_RESTRICT_VALUE_TO_BOUNDS(r_VkMaxFramesInFlight.i,
|
|
||||||
RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT,
|
|
||||||
RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT);
|
|
||||||
|
|
||||||
int res = CreateInstance();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = CreateSurface(info);
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = ChoosePhysicalDevice();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = CreateDevice();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = CreateAllocator();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = CreatePerFrameObjects();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitPipelineManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitRenderTargetManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitializeSempahoreManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitCommandBufferManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitBufferManagement();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = InitializeTransfers();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
res = rtCreateSwapchain();
|
|
||||||
if (res != RT_SUCCESS)
|
|
||||||
return res;
|
|
||||||
rtUpdateSwapchainRenderTarget();
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(Shutdown)(void) {
|
|
||||||
rtLog("vk", "Shutdown");
|
|
||||||
vkDeviceWaitIdle(g_gpu.device);
|
|
||||||
rtDestroySwapchain();
|
|
||||||
ShutdownTransfers();
|
|
||||||
ShutdownBufferManagement();
|
|
||||||
ShutdownCommandBufferManagement();
|
|
||||||
ShutdownSemaphoreManagement();
|
|
||||||
ShutdownRenderTargetManagement();
|
|
||||||
ShutdownPipelineManagement();
|
|
||||||
DestroyPerFrameObjects();
|
|
||||||
DestroyAllocator();
|
|
||||||
vkDestroyDevice(g_gpu.device, g_gpu.alloc_cb);
|
|
||||||
vkDestroySurfaceKHR(g_gpu.instance, g_gpu.surface, g_gpu.alloc_cb);
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
vkDestroyDebugUtilsMessengerEXT(g_gpu.instance, g_gpu.messenger, g_gpu.alloc_cb);
|
|
||||||
#endif
|
|
||||||
vkDestroyInstance(g_gpu.instance, g_gpu.alloc_cb);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
|
|
||||||
return g_gpu.max_frames_in_flight;
|
|
||||||
}
|
|
@ -1,56 +0,0 @@
|
|||||||
if vk_dep.found()
|
|
||||||
platform_defs = []
|
|
||||||
if get_option('use_xlib')
|
|
||||||
platform_defs = ['-DVK_USE_PLATFORM_XLIB_KHR']
|
|
||||||
elif host_machine.system() == 'windows'
|
|
||||||
platform_defs = ['-DVK_USE_PLATFORM_WIN32_KHR']
|
|
||||||
endif
|
|
||||||
|
|
||||||
vk_inc_dep = vk_dep.partial_dependency(compile_args : true, includes : true)
|
|
||||||
vk_renderer_lib = library('rtvk',
|
|
||||||
# Project Sources
|
|
||||||
'command_buffers.h',
|
|
||||||
'gpu.h',
|
|
||||||
'gpu_sync.h',
|
|
||||||
'pipelines.h',
|
|
||||||
'render_targets.h',
|
|
||||||
'swapchain.h',
|
|
||||||
'transfers.h',
|
|
||||||
|
|
||||||
'../common/common_render_graph.h',
|
|
||||||
|
|
||||||
'buffers.c',
|
|
||||||
'command_buffers.c',
|
|
||||||
'commands.c',
|
|
||||||
'frame.c',
|
|
||||||
'gpu_sync.c',
|
|
||||||
'helper.c',
|
|
||||||
'init.c',
|
|
||||||
'pipelines.c',
|
|
||||||
'render_graph.c',
|
|
||||||
'render_targets.c',
|
|
||||||
'swapchain.c',
|
|
||||||
'transfers.c',
|
|
||||||
|
|
||||||
'simple_sync_impl.cpp',
|
|
||||||
|
|
||||||
'../common/common_render_graph.c',
|
|
||||||
|
|
||||||
# Contrib Sources
|
|
||||||
'../../../contrib/volk/volk.h',
|
|
||||||
'../../../contrib/volk/volk.c',
|
|
||||||
'../../../contrib/vma/vk_mem_alloc.h',
|
|
||||||
'vma_impl.cpp',
|
|
||||||
dependencies : [m_dep, vk_inc_dep, windowing_dep],
|
|
||||||
include_directories : [engine_incdir, contrib_incdir],
|
|
||||||
link_with : [runtime_lib],
|
|
||||||
c_pch : 'pch/vk_pch.h',
|
|
||||||
c_args : platform_defs,
|
|
||||||
cpp_pch : 'pch/vk_pch.hpp',
|
|
||||||
cpp_args : platform_defs,
|
|
||||||
install : true)
|
|
||||||
|
|
||||||
engine_libs += vk_renderer_lib
|
|
||||||
engine_lib_paths += vk_renderer_lib.full_path()
|
|
||||||
endif
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
|||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
|
||||||
#include <Windows.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(VY_USE_XLIB)
|
|
||||||
#include <X11/Xlib.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
/* GFX */
|
|
||||||
#include "gfx/gfx.h"
|
|
||||||
|
|
||||||
/* Commonly used runtime headers */
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
#include "runtime/threading.h"
|
|
@ -1,3 +0,0 @@
|
|||||||
extern "C" {
|
|
||||||
#include "vk_pch.h"
|
|
||||||
}
|
|
@ -1,186 +0,0 @@
|
|||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/handles.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/resources.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "gfx/effect.h"
|
|
||||||
|
|
||||||
#include "gpu.h"
|
|
||||||
#include "pipelines.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
RT_CVAR_I(r_VkMaxPipelineCount, "Maximum number of pipeline objects. Default: 1024", 1024);
|
|
||||||
|
|
||||||
typedef struct rt_pipeline_s {
|
|
||||||
uint32_t version;
|
|
||||||
rt_pipeline pipeline;
|
|
||||||
struct rt_pipeline_s *next_free;
|
|
||||||
} rt_pipeline_slot;
|
|
||||||
|
|
||||||
static rt_pipeline_slot *_pipelines;
|
|
||||||
static rt_pipeline_slot *_first_free;
|
|
||||||
static rt_rwlock _lock;
|
|
||||||
|
|
||||||
static void DestroyPipeline(rt_pipeline_slot *slot) {
|
|
||||||
if (slot->pipeline.pipeline) {
|
|
||||||
vkDestroyPipeline(g_gpu.device, slot->pipeline.pipeline, g_gpu.alloc_cb);
|
|
||||||
}
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VkShaderModule CreateShaderModuleFromResource(rt_resource_id rid) {
|
|
||||||
if (rid == RT_INVALID_RESOURCE_ID)
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
rt_resource *resource = NULL;
|
|
||||||
size_t size = rtGetResourceSize(rid);
|
|
||||||
if (size == 0)
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
if (!temp.arena)
|
|
||||||
return VK_NULL_HANDLE;
|
|
||||||
|
|
||||||
VkShaderModule module = VK_NULL_HANDLE;
|
|
||||||
resource = rtArenaPush(temp.arena, size);
|
|
||||||
if (!resource) {
|
|
||||||
rtLog("VK", "Failed to allocate temporary memory for retrieving a shader resource");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rtGetResource(rid, resource) != RT_SUCCESS) {
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
if (resource->type != RT_RESOURCE_SHADER) {
|
|
||||||
rtLog("VK", "Attempted to create a shader module from a non-shader resource %llx", rid);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
rt_shader_info *info = resource->data;
|
|
||||||
if (!info) {
|
|
||||||
rtLog("VK", "Shader resource %llx has no attached shader_info", rid);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkShaderModuleCreateInfo module_info = {.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
|
||||||
.pCode = rtResolveRelptr(&info->bytecode),
|
|
||||||
.codeSize = info->bytecode_length};
|
|
||||||
if (vkCreateShaderModule(g_gpu.device, &module_info, g_gpu.alloc_cb, &module) != VK_SUCCESS) {
|
|
||||||
rtLog("VK", "Failed to create the shader module from resource %llx", rid);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
rtReturnTemporaryArena(temp);
|
|
||||||
return module;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool CreateComputePipeline(VkShaderModule compute_shader,
|
|
||||||
const rt_pipeline_info *info,
|
|
||||||
rt_pipeline_slot *slot) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool CreateGraphicsPipeline(VkShaderModule vertex_shader,
|
|
||||||
VkShaderModule fragment_shader,
|
|
||||||
const rt_pipeline_info *info,
|
|
||||||
rt_pipeline_slot *slot) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result InitPipelineManagement(void) {
|
|
||||||
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
|
||||||
if (!lock_res.ok)
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
_lock = lock_res.lock;
|
|
||||||
|
|
||||||
_pipelines = calloc(r_VkMaxPipelineCount.i, sizeof(rt_pipeline_slot));
|
|
||||||
if (!_pipelines) {
|
|
||||||
rtDestroyRWLock(&_lock);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
/* Keep [0] unused to preserve 0 as the invalid handle */
|
|
||||||
_first_free = &_pipelines[1];
|
|
||||||
for (int i = 1; i < r_VkMaxPipelineCount.i - 1; ++i) {
|
|
||||||
_pipelines[i].next_free = &_pipelines[i + 1];
|
|
||||||
}
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownPipelineManagement(void) {
|
|
||||||
for (int i = 1; i < r_VkMaxPipelineCount.i; ++i) {
|
|
||||||
DestroyPipeline(&_pipelines[i]);
|
|
||||||
}
|
|
||||||
free(_pipelines);
|
|
||||||
rtDestroyRWLock(&_lock);
|
|
||||||
_first_free = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
|
|
||||||
rt_pipeline_handle handle = RT_INVALID_HANDLE;
|
|
||||||
rtLockWrite(&_lock);
|
|
||||||
if (!_first_free) {
|
|
||||||
rtLog("VK", "No free pipeline slots!");
|
|
||||||
rtUnlockWrite(&_lock);
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
rt_pipeline_slot *slot = _first_free;
|
|
||||||
_first_free = slot->next_free;
|
|
||||||
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
|
|
||||||
/* No other thread that calls compile gets the same slot.
|
|
||||||
* Another thread accessing the slot via GetPipeline would get a version mismatch.
|
|
||||||
* The same holds for DestroyPipeline
|
|
||||||
*/
|
|
||||||
rtUnlockWrite(&_lock);
|
|
||||||
|
|
||||||
VkShaderModule vertex_shader = CreateShaderModuleFromResource(info->vertex_shader);
|
|
||||||
VkShaderModule fragment_shader = CreateShaderModuleFromResource(info->fragment_shader);
|
|
||||||
VkShaderModule compute_shader = CreateShaderModuleFromResource(info->compute_shader);
|
|
||||||
|
|
||||||
RT_UNUSED(vertex_shader);
|
|
||||||
RT_UNUSED(fragment_shader);
|
|
||||||
RT_UNUSED(compute_shader);
|
|
||||||
|
|
||||||
bool create_success = false;
|
|
||||||
if (compute_shader) {
|
|
||||||
create_success = CreateComputePipeline(compute_shader, info, slot);
|
|
||||||
} else if (vertex_shader && fragment_shader) {
|
|
||||||
create_success = CreateGraphicsPipeline(vertex_shader, fragment_shader, info, slot);
|
|
||||||
} else {
|
|
||||||
rtLog("VK", "Invalid combination of shaders in pipeline info.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (create_success) {
|
|
||||||
handle.version = slot->version;
|
|
||||||
handle.index = (uint32_t)(slot - _pipelines);
|
|
||||||
}
|
|
||||||
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
|
|
||||||
if (handle.index >= (uint32_t)r_VkMaxPipelineCount.i)
|
|
||||||
return;
|
|
||||||
rtLockWrite(&_lock);
|
|
||||||
if (_pipelines[handle.index].version == handle.version)
|
|
||||||
DestroyPipeline(&_pipelines[handle.index]);
|
|
||||||
else
|
|
||||||
rtLog("VK", "Tried to destroy a pipeline using an outdated handle.");
|
|
||||||
rtUnlockWrite(&_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
const rt_pipeline *rtGetPipeline(rt_pipeline_handle handle) {
|
|
||||||
if (handle.index >= (uint32_t)r_VkMaxPipelineCount.i)
|
|
||||||
return NULL;
|
|
||||||
rtLockRead(&_lock);
|
|
||||||
rt_pipeline *res = NULL;
|
|
||||||
if (_pipelines[handle.index].version == handle.version)
|
|
||||||
res = &_pipelines[handle.index].pipeline;
|
|
||||||
else
|
|
||||||
rtLog("VK", "Tried to access a pipeline using an outdated handle.");
|
|
||||||
rtUnlockRead(&_lock);
|
|
||||||
return res;
|
|
||||||
}
|
|
@ -1,15 +0,0 @@
|
|||||||
#ifndef RT_VK_PIPELINES_H
|
|
||||||
#define RT_VK_PIPELINES_H
|
|
||||||
|
|
||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkPipeline pipeline;
|
|
||||||
} rt_pipeline;
|
|
||||||
|
|
||||||
/* A pipeline is immutable after creation. */
|
|
||||||
const rt_pipeline *rtGetPipeline(rt_pipeline_handle handle);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,27 +0,0 @@
|
|||||||
#include "gpu.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
|
|
||||||
#include "../common/common_render_graph.h"
|
|
||||||
#include "render_targets.h"
|
|
||||||
|
|
||||||
static int RequireExplicitSynchronization(void) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
|
|
||||||
rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = rtCreateRenderTarget,
|
|
||||||
.RequireExplicitSynchronization =
|
|
||||||
RequireExplicitSynchronization};
|
|
||||||
return rtCreateRenderGraphBuilder(&cbs);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
|
|
||||||
rtDestroyRenderGraphBuilder(builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
|
|
||||||
RT_NOT_IMPLEMENTED;
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
@ -1,400 +0,0 @@
|
|||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
#include "gpu.h"
|
|
||||||
#include "render_targets.h"
|
|
||||||
#include "swapchain.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
RT_CVAR_I(r_VkMaxRenderTargetCount, "Maximum number of render target objects. Default: 1024", 1024);
|
|
||||||
|
|
||||||
typedef struct rt_render_target_slot_s {
|
|
||||||
uint32_t version;
|
|
||||||
rt_render_target render_target;
|
|
||||||
struct rt_render_target_slot_s *next_free;
|
|
||||||
} rt_render_target_slot;
|
|
||||||
|
|
||||||
static rt_render_target_slot *_render_targets;
|
|
||||||
static rt_render_target_slot *_first_free;
|
|
||||||
static rt_rwlock _lock;
|
|
||||||
|
|
||||||
static rt_render_target_handle _swapchain_handle;
|
|
||||||
|
|
||||||
static void DestroyRenderTarget(rt_render_target_slot *slot) {
|
|
||||||
for (unsigned int i = 0; i < slot->render_target.image_count; ++i) {
|
|
||||||
vkDestroyImageView(g_gpu.device, slot->render_target.view[i], g_gpu.alloc_cb);
|
|
||||||
vmaDestroyImage(g_gpu.allocator,
|
|
||||||
slot->render_target.image[i],
|
|
||||||
slot->render_target.allocation[i]);
|
|
||||||
}
|
|
||||||
slot->next_free = _first_free;
|
|
||||||
_first_free = slot;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool CreateImageAndView(VkExtent2D extent,
|
|
||||||
VkFormat format,
|
|
||||||
VkSampleCountFlagBits sample_count,
|
|
||||||
VkImageUsageFlagBits usage,
|
|
||||||
VkImageAspectFlagBits aspect,
|
|
||||||
VkImage *p_image,
|
|
||||||
VmaAllocation *p_allocation,
|
|
||||||
VkImageView *p_view,
|
|
||||||
const char *rt_name,
|
|
||||||
uint32_t image_index) {
|
|
||||||
|
|
||||||
uint32_t queue_families[3];
|
|
||||||
uint32_t distinct_queue_families = 1;
|
|
||||||
queue_families[0] = g_gpu.graphics_family;
|
|
||||||
if (g_gpu.compute_family != g_gpu.graphics_family)
|
|
||||||
queue_families[distinct_queue_families++] = g_gpu.compute_family;
|
|
||||||
if (g_gpu.present_family != g_gpu.graphics_family &&
|
|
||||||
g_gpu.present_family != g_gpu.compute_family)
|
|
||||||
queue_families[distinct_queue_families++] = g_gpu.present_family;
|
|
||||||
|
|
||||||
VkFormatProperties2 props = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
|
|
||||||
};
|
|
||||||
vkGetPhysicalDeviceFormatProperties2(g_gpu.phys_device, format, &props);
|
|
||||||
if ((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) == 0) {
|
|
||||||
rtLog("vk",
|
|
||||||
"Requested render target format %s can not be sampled.",
|
|
||||||
rtVkFormatToString(format));
|
|
||||||
usage &= ~VK_IMAGE_USAGE_SAMPLED_BIT;
|
|
||||||
}
|
|
||||||
if ((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) == 0) {
|
|
||||||
rtLog("vk",
|
|
||||||
"Requested render target format %s can not be used for storage.",
|
|
||||||
rtVkFormatToString(format));
|
|
||||||
usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
|
|
||||||
}
|
|
||||||
if (((usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) != 0) &&
|
|
||||||
((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) ==
|
|
||||||
0)) {
|
|
||||||
rtReportError(
|
|
||||||
"vk",
|
|
||||||
"Tried to create a render target color attachment, but the format %s does not "
|
|
||||||
"support the color attachment usage.",
|
|
||||||
rtVkFormatToString(format));
|
|
||||||
return false;
|
|
||||||
} else if (((usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) != 0) &&
|
|
||||||
((props.formatProperties.optimalTilingFeatures &
|
|
||||||
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) == 0)) {
|
|
||||||
rtReportError("vk",
|
|
||||||
"Tried to create a render target depth/stencil attachment, but the format %s"
|
|
||||||
"does not support the depth/stencil attachment usage.",
|
|
||||||
rtVkFormatToString(format));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkImageCreateInfo image_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
|
||||||
.imageType = VK_IMAGE_TYPE_2D,
|
|
||||||
.format = format,
|
|
||||||
.extent = {.width = extent.width, .height = extent.height, .depth = 1},
|
|
||||||
.mipLevels = 1,
|
|
||||||
.arrayLayers = 1,
|
|
||||||
.samples = sample_count,
|
|
||||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
|
||||||
.usage = usage,
|
|
||||||
.sharingMode =
|
|
||||||
(distinct_queue_families > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE,
|
|
||||||
.pQueueFamilyIndices = (distinct_queue_families > 1) ? queue_families : NULL,
|
|
||||||
.queueFamilyIndexCount = distinct_queue_families,
|
|
||||||
};
|
|
||||||
VmaAllocationCreateInfo alloc_info = {
|
|
||||||
.usage = VMA_MEMORY_USAGE_GPU_ONLY,
|
|
||||||
};
|
|
||||||
|
|
||||||
VkImage image;
|
|
||||||
VmaAllocation allocation;
|
|
||||||
if (vmaCreateImage(g_gpu.allocator, &image_info, &alloc_info, &image, &allocation, NULL) !=
|
|
||||||
VK_SUCCESS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkImageViewCreateInfo view_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
|
||||||
.image = image,
|
|
||||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
|
||||||
.format = format,
|
|
||||||
.components = {.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
|
||||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
|
||||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
|
||||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY},
|
|
||||||
/* clang-format off */
|
|
||||||
.subresourceRange = {
|
|
||||||
.aspectMask = aspect,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.layerCount = 1,
|
|
||||||
.levelCount = 1,
|
|
||||||
},
|
|
||||||
/* clang-format on */
|
|
||||||
};
|
|
||||||
VkImageView view;
|
|
||||||
if (vkCreateImageView(g_gpu.device, &view_info, g_gpu.alloc_cb, &view) != VK_SUCCESS) {
|
|
||||||
rtLog("VK", "Failed to create render target image view");
|
|
||||||
vmaDestroyImage(g_gpu.allocator, image, allocation);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef RT_DEBUG
|
|
||||||
char name[260];
|
|
||||||
rtSPrint(name, 260, "%s (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index);
|
|
||||||
VkDebugUtilsObjectNameInfoEXT name_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
|
||||||
.objectHandle = (uint64_t)image,
|
|
||||||
.pObjectName = name,
|
|
||||||
.objectType = VK_OBJECT_TYPE_IMAGE};
|
|
||||||
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
|
|
||||||
|
|
||||||
rtSPrint(name, 260, "%s [view] (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index);
|
|
||||||
name_info =
|
|
||||||
(VkDebugUtilsObjectNameInfoEXT){.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
|
|
||||||
.objectHandle = (uint64_t)view,
|
|
||||||
.pObjectName = name,
|
|
||||||
.objectType = VK_OBJECT_TYPE_IMAGE_VIEW};
|
|
||||||
vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
*p_image = image;
|
|
||||||
*p_allocation = allocation;
|
|
||||||
*p_view = view;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result InitRenderTargetManagement(void) {
|
|
||||||
rt_create_rwlock_result lock_res = rtCreateRWLock();
|
|
||||||
if (!lock_res.ok)
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
_lock = lock_res.lock;
|
|
||||||
|
|
||||||
_render_targets = calloc(r_VkMaxRenderTargetCount.i, sizeof(rt_render_target_slot));
|
|
||||||
if (!_render_targets) {
|
|
||||||
rtDestroyRWLock(&_lock);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
/* Keep [0] unused to preserve 0 as the invalid handle */
|
|
||||||
_first_free = &_render_targets[1];
|
|
||||||
for (int i = 1; i < r_VkMaxRenderTargetCount.i - 1; ++i) {
|
|
||||||
_render_targets[i].next_free = &_render_targets[i + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Reserve the slot for the swap chain rt */
|
|
||||||
rt_render_target_slot *slot = _first_free;
|
|
||||||
_first_free = slot->next_free;
|
|
||||||
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
_swapchain_handle = (rt_render_target_handle){.version = slot->version,
|
|
||||||
.index = (uint32_t)(slot - _render_targets)};
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownRenderTargetManagement(void) {
|
|
||||||
for (int i = 1; i < r_VkMaxRenderTargetCount.i; ++i) {
|
|
||||||
DestroyRenderTarget(&_render_targets[i]);
|
|
||||||
}
|
|
||||||
free(_render_targets);
|
|
||||||
rtDestroyRWLock(&_lock);
|
|
||||||
_first_free = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info) {
|
|
||||||
rt_render_target_handle handle = {0};
|
|
||||||
|
|
||||||
rtLockWrite(&_lock);
|
|
||||||
if (!_first_free) {
|
|
||||||
rtLog("VK", "No free render target slots!");
|
|
||||||
rtUnlockWrite(&_lock);
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
rt_render_target_slot *slot = _first_free;
|
|
||||||
_first_free = slot->next_free;
|
|
||||||
slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
|
||||||
|
|
||||||
/* No other thread that calls compile gets the same slot.
|
|
||||||
* Another thread accessing the slot via GetPipeline would get a version mismatch.
|
|
||||||
* The same holds for DestroyPipeline
|
|
||||||
*/
|
|
||||||
rtUnlockWrite(&_lock);
|
|
||||||
|
|
||||||
const char *name = info->name;
|
|
||||||
|
|
||||||
slot->render_target.match_swapchain = 0;
|
|
||||||
slot->render_target.image_count = g_swapchain.image_count;
|
|
||||||
for (unsigned int i = 0; i < g_swapchain.image_count; ++i) {
|
|
||||||
uint32_t width = info->width, height = info->height;
|
|
||||||
if (width == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
|
|
||||||
width = g_swapchain.extent.width;
|
|
||||||
slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE;
|
|
||||||
}
|
|
||||||
if (height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
|
|
||||||
height = g_swapchain.extent.height;
|
|
||||||
slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE;
|
|
||||||
}
|
|
||||||
slot->render_target.extent = (VkExtent2D){.width = width, .height = height};
|
|
||||||
|
|
||||||
if (info->format != RT_PIXEL_FORMAT_SWAPCHAIN)
|
|
||||||
slot->render_target.format = rtPixelFormatToVkFormat(info->format);
|
|
||||||
else {
|
|
||||||
slot->render_target.format = g_swapchain.format;
|
|
||||||
slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT;
|
|
||||||
}
|
|
||||||
if (info->format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8 ||
|
|
||||||
info->format == RT_PIXEL_FORMAT_DEPTH32) {
|
|
||||||
slot->render_target.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
|
|
||||||
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
|
||||||
if (info->format == RT_PIXEL_FORMAT_DEPTH32)
|
|
||||||
slot->render_target.aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
|
|
||||||
else
|
|
||||||
slot->render_target.aspect =
|
|
||||||
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
||||||
} else {
|
|
||||||
slot->render_target.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
|
|
||||||
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
|
||||||
slot->render_target.aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
||||||
}
|
|
||||||
slot->render_target.sample_count = rtSampleCountToFlags(info->samples);
|
|
||||||
if (!CreateImageAndView(slot->render_target.extent,
|
|
||||||
slot->render_target.format,
|
|
||||||
slot->render_target.sample_count,
|
|
||||||
slot->render_target.usage,
|
|
||||||
slot->render_target.aspect,
|
|
||||||
&slot->render_target.image[i],
|
|
||||||
&slot->render_target.allocation[i],
|
|
||||||
&slot->render_target.view[i],
|
|
||||||
name,
|
|
||||||
i)) {
|
|
||||||
slot->render_target.image_count = i;
|
|
||||||
DestroyRenderTarget(slot);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
slot->render_target.states[i] = RT_RENDER_TARGET_STATE_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
handle.version = slot->version;
|
|
||||||
handle.index = (uint32_t)(slot - _render_targets);
|
|
||||||
out:
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
void rtDestroyRenderTarget(rt_render_target_handle handle) {
|
|
||||||
if (handle.index >= (uint32_t)r_VkMaxRenderTargetCount.i)
|
|
||||||
return;
|
|
||||||
rtLockWrite(&_lock);
|
|
||||||
if (_render_targets[handle.index].version == handle.version)
|
|
||||||
DestroyRenderTarget(&_render_targets[handle.index]);
|
|
||||||
else
|
|
||||||
rtLog("VK", "Tried to destroy a render target using an outdated handle.");
|
|
||||||
rtUnlockWrite(&_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle) {
|
|
||||||
if (handle.index >= (uint32_t)r_VkMaxRenderTargetCount.i)
|
|
||||||
return NULL;
|
|
||||||
rtLockRead(&_lock);
|
|
||||||
rt_render_target *res = NULL;
|
|
||||||
if (_render_targets[handle.index].version == handle.version)
|
|
||||||
res = &_render_targets[handle.index].render_target;
|
|
||||||
else
|
|
||||||
rtLog("VK", "Tried to access a render target using an outdated handle.");
|
|
||||||
rtUnlockRead(&_lock);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_render_target_handle rtGetSwapchainRenderTarget(void) {
|
|
||||||
return _swapchain_handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
void rtUpdateSwapchainRenderTarget(void) {
|
|
||||||
RT_ASSERT(_swapchain_handle.index != 0, "Invalid swap chain render target!");
|
|
||||||
rt_render_target_slot *slot = &_render_targets[_swapchain_handle.index];
|
|
||||||
rt_render_target *rt = &slot->render_target;
|
|
||||||
|
|
||||||
rt->match_swapchain = 0;
|
|
||||||
rt->format = g_swapchain.format;
|
|
||||||
rt->extent = g_swapchain.extent;
|
|
||||||
rt->sample_count = 1;
|
|
||||||
rt->usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
|
||||||
rt->aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
||||||
for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
|
|
||||||
rt->allocation[i] = NULL;
|
|
||||||
rt->image[i] = g_swapchain.images[i];
|
|
||||||
rt->view[i] = g_swapchain.image_views[i];
|
|
||||||
rt->states[i] = RT_RENDER_TARGET_STATE_INVALID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, VkExtent2D extent) {
|
|
||||||
rtLockWrite(&_lock);
|
|
||||||
for (uint32_t i = 1; i < (uint32_t)r_VkMaxRenderTargetCount.i; ++i) {
|
|
||||||
if (_render_targets[i].render_target.image_count == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
rt_render_target *render_target = &_render_targets[i].render_target;
|
|
||||||
if (render_target->match_swapchain != 0) {
|
|
||||||
for (uint32_t j = 0; j < render_target->image_count; ++j) {
|
|
||||||
vkDestroyImageView(g_gpu.device, render_target->view[j], g_gpu.alloc_cb);
|
|
||||||
vmaDestroyImage(g_gpu.allocator,
|
|
||||||
render_target->image[j],
|
|
||||||
render_target->allocation[j]);
|
|
||||||
}
|
|
||||||
if ((render_target->match_swapchain & RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT) != 0) {
|
|
||||||
render_target->format = format;
|
|
||||||
} else if ((render_target->match_swapchain & RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE) !=
|
|
||||||
0) {
|
|
||||||
render_target->extent = extent;
|
|
||||||
}
|
|
||||||
for (uint32_t j = 0; j < image_count; ++j) {
|
|
||||||
if (!CreateImageAndView(render_target->extent,
|
|
||||||
render_target->format,
|
|
||||||
render_target->sample_count,
|
|
||||||
render_target->usage,
|
|
||||||
render_target->aspect,
|
|
||||||
&render_target->image[j],
|
|
||||||
&render_target->allocation[j],
|
|
||||||
&render_target->view[j],
|
|
||||||
NULL,
|
|
||||||
j)) {
|
|
||||||
render_target->image_count = j;
|
|
||||||
DestroyRenderTarget(&_render_targets[i]);
|
|
||||||
rtReportError("VK", "Failed to recreate swapchain-matching render target");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (render_target->image_count < image_count) {
|
|
||||||
/* Create additional images */
|
|
||||||
for (uint32_t j = render_target->image_count; j < image_count; ++j) {
|
|
||||||
if (!CreateImageAndView(render_target->extent,
|
|
||||||
render_target->format,
|
|
||||||
render_target->sample_count,
|
|
||||||
render_target->usage,
|
|
||||||
render_target->aspect,
|
|
||||||
&render_target->image[j],
|
|
||||||
&render_target->allocation[j],
|
|
||||||
&render_target->view[j],
|
|
||||||
NULL,
|
|
||||||
j)) {
|
|
||||||
render_target->image_count = j;
|
|
||||||
DestroyRenderTarget(&_render_targets[i]);
|
|
||||||
rtReportError("VK", "Failed to create additional render target images");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (render_target->image_count > image_count) {
|
|
||||||
/* Delete unnecessary images */
|
|
||||||
for (uint32_t j = image_count; j < render_target->image_count; ++j) {
|
|
||||||
vkDestroyImageView(g_gpu.device, render_target->view[j], g_gpu.alloc_cb);
|
|
||||||
vmaDestroyImage(g_gpu.allocator,
|
|
||||||
render_target->image[j],
|
|
||||||
render_target->allocation[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
render_target->image_count = image_count;
|
|
||||||
}
|
|
||||||
rtUnlockWrite(&_lock);
|
|
||||||
}
|
|
@ -1,44 +0,0 @@
|
|||||||
#ifndef RT_VK_RENDER_TARGETS_H
|
|
||||||
#define RT_VK_RENDER_TARGETS_H
|
|
||||||
|
|
||||||
#include "gpu.h"
|
|
||||||
#include "gfx/renderer_api.h"
|
|
||||||
|
|
||||||
#include "../common/common_render_graph.h"
|
|
||||||
|
|
||||||
|
|
||||||
/* Must match RT_VK_MAX_SWAPCHAIN_IMAGES */
|
|
||||||
#define RT_VK_RENDER_TARGET_MAX_IMAGES 3
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE = 0x01,
|
|
||||||
RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT = 0x02,
|
|
||||||
} rt_render_target_match_swapchain_flags;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkImage image[RT_VK_RENDER_TARGET_MAX_IMAGES];
|
|
||||||
VkImageView view[RT_VK_RENDER_TARGET_MAX_IMAGES];
|
|
||||||
VmaAllocation allocation[RT_VK_RENDER_TARGET_MAX_IMAGES];
|
|
||||||
rt_render_target_state states[RT_VK_RENDER_TARGET_MAX_IMAGES];
|
|
||||||
VkSampleCountFlagBits sample_count;
|
|
||||||
VkFormat format;
|
|
||||||
VkExtent2D extent;
|
|
||||||
VkImageUsageFlagBits usage;
|
|
||||||
VkImageAspectFlags aspect;
|
|
||||||
unsigned int image_count;
|
|
||||||
rt_render_target_match_swapchain_flags match_swapchain;
|
|
||||||
} rt_render_target;
|
|
||||||
|
|
||||||
rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info);
|
|
||||||
void rtDestroyRenderTarget(rt_render_target_handle handle);
|
|
||||||
|
|
||||||
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle);
|
|
||||||
rt_render_target_handle rtGetSwapchainRenderTarget(void);
|
|
||||||
|
|
||||||
/* Update the render target that represents the swap chain */
|
|
||||||
void rtUpdateSwapchainRenderTarget(void);
|
|
||||||
|
|
||||||
/* Update render targets that match the swap chain*/
|
|
||||||
void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, VkExtent2D extent);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,80 +0,0 @@
|
|||||||
#ifndef RT_VK_RESOURCES_H
|
|
||||||
#define RT_VK_RESOURCES_H
|
|
||||||
|
|
||||||
/* Buffers and images */
|
|
||||||
|
|
||||||
#include "gpu.h"
|
|
||||||
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RT_BUFFER_STATE_INVALID,
|
|
||||||
|
|
||||||
RT_BUFFER_STATE_NOT_USED,
|
|
||||||
|
|
||||||
RT_BUFFER_STATE_IN_USE,
|
|
||||||
|
|
||||||
RT_BUFFER_STATE_IN_TRANSFER,
|
|
||||||
} rt_buffer_state;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkBuffer buffer;
|
|
||||||
VmaAllocation allocation;
|
|
||||||
size_t size;
|
|
||||||
rt_buffer_usage usage;
|
|
||||||
rt_buffer_type type;
|
|
||||||
rt_buffer_state state;
|
|
||||||
rt_rwlock lock;
|
|
||||||
|
|
||||||
bool mappable;
|
|
||||||
bool coherent;
|
|
||||||
|
|
||||||
rt_gpu_queue owner;
|
|
||||||
} rt_buffer;
|
|
||||||
|
|
||||||
|
|
||||||
rt_buffer *rtGetBuffer(rt_buffer_handle handle);
|
|
||||||
|
|
||||||
/* Helper functions for accessing buffers */
|
|
||||||
|
|
||||||
RT_INLINE rt_gpu_queue rtGetBufferOwner(rt_buffer_handle handle) {
|
|
||||||
rt_buffer *buffer = rtGetBuffer(handle);
|
|
||||||
rt_gpu_queue owner = RT_VK_UNOWNED;
|
|
||||||
if (buffer) {
|
|
||||||
rtLockRead(&buffer->lock);
|
|
||||||
owner = buffer->owner;
|
|
||||||
rtUnlockRead(&buffer->lock);
|
|
||||||
}
|
|
||||||
return owner;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_INLINE void rtSetBufferOwner(rt_buffer_handle handle, rt_gpu_queue owner) {
|
|
||||||
rt_buffer *buffer = rtGetBuffer(handle);
|
|
||||||
if (buffer) {
|
|
||||||
rtLockWrite(&buffer->lock);
|
|
||||||
buffer->owner = owner;
|
|
||||||
rtUnlockWrite(&buffer->lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_INLINE rt_buffer_state rtGetBufferState(rt_buffer_handle handle) {
|
|
||||||
rt_buffer *buffer = rtGetBuffer(handle);
|
|
||||||
rt_buffer_state state = RT_BUFFER_STATE_INVALID;
|
|
||||||
if (buffer) {
|
|
||||||
rtLockRead(&buffer->lock);
|
|
||||||
state = buffer->state;
|
|
||||||
rtUnlockRead(&buffer->lock);
|
|
||||||
}
|
|
||||||
return state;
|
|
||||||
}
|
|
||||||
|
|
||||||
RT_INLINE void rtSetBufferState(rt_buffer_handle handle, rt_buffer_state state) {
|
|
||||||
rt_buffer *buffer = rtGetBuffer(handle);
|
|
||||||
if (buffer) {
|
|
||||||
rtLockWrite(&buffer->lock);
|
|
||||||
buffer->state = state;
|
|
||||||
rtUnlockWrite(&buffer->lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,6 +0,0 @@
|
|||||||
#include "gpu.h"
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION
|
|
||||||
#include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h"
|
|
||||||
}
|
|
@ -1,205 +0,0 @@
|
|||||||
#define RT_VK_DONT_DEFINE_SWAPCHAIN_GLOBAL
|
|
||||||
#include "swapchain.h"
|
|
||||||
#include "gpu.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
|
||||||
#include <Windows.h>
|
|
||||||
#elif defined(RT_USE_XLIB)
|
|
||||||
#include <X11/Xlib.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
RT_CVAR_I(r_VkPreferredSwapchainImages,
|
|
||||||
"Preferred number of swapchain iamges. [2/3] Default: 2",
|
|
||||||
2);
|
|
||||||
RT_CVAR_I(r_VkPreferMailboxMode, "Prefer mailbox present mode over fifo mode. [0/1] Default: 0", 1);
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkPresentModeKHR present_mode;
|
|
||||||
VkSurfaceFormatKHR surface_format;
|
|
||||||
VkExtent2D extent;
|
|
||||||
VkSurfaceTransformFlagsKHR pre_transform;
|
|
||||||
} rt_device_swapchain_parameters;
|
|
||||||
|
|
||||||
static rt_device_swapchain_parameters DetermineSwapchainParameters(void) {
|
|
||||||
rt_device_swapchain_parameters params;
|
|
||||||
|
|
||||||
/* determine presentation mode. FIFO should always be available.
|
|
||||||
* TODO: If vsync is enabled, we should always choose FIFO.
|
|
||||||
*/
|
|
||||||
params.present_mode = VK_PRESENT_MODE_FIFO_KHR;
|
|
||||||
if (r_VkPreferMailboxMode.i) {
|
|
||||||
VkPresentModeKHR modes[6];
|
|
||||||
uint32_t count = 6;
|
|
||||||
vkGetPhysicalDeviceSurfacePresentModesKHR(g_gpu.phys_device, g_gpu.surface, &count, modes);
|
|
||||||
for (uint32_t i = 0; i < count; ++i) {
|
|
||||||
if (modes[i] == VK_PRESENT_MODE_MAILBOX_KHR)
|
|
||||||
params.present_mode = VK_PRESENT_MODE_MAILBOX_KHR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Determine surface format */
|
|
||||||
VkSurfaceFormatKHR formats[64];
|
|
||||||
uint32_t format_count = 64;
|
|
||||||
vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, NULL);
|
|
||||||
vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, formats);
|
|
||||||
params.surface_format = formats[0];
|
|
||||||
for (uint32_t i = 0; i < format_count; ++i) {
|
|
||||||
if (formats[i].format == VK_FORMAT_B8G8R8A8_SRGB &&
|
|
||||||
formats[i].colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
|
|
||||||
params.surface_format = formats[i];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* get extent */
|
|
||||||
VkSurfaceCapabilitiesKHR capabilities;
|
|
||||||
vkGetPhysicalDeviceSurfaceCapabilitiesKHR(g_gpu.phys_device, g_gpu.surface, &capabilities);
|
|
||||||
if (capabilities.currentExtent.width != UINT32_MAX) {
|
|
||||||
params.extent = capabilities.currentExtent;
|
|
||||||
} else {
|
|
||||||
#ifdef _WIN32
|
|
||||||
RECT client_area;
|
|
||||||
GetClientRect(g_gpu.native_window.hWnd, &client_area);
|
|
||||||
params.extent.width = (uint32_t)client_area.right;
|
|
||||||
params.extent.height = (uint32_t)client_area.bottom;
|
|
||||||
#else
|
|
||||||
XWindowAttributes attribs;
|
|
||||||
XGetWindowAttributes(g_gpu.native_window.display, g_gpu.native_window.window, &attribs);
|
|
||||||
params.extent.width = (uint32_t)attribs.width;
|
|
||||||
params.extent.height = (uint32_t)attribs.height;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
params.pre_transform = capabilities.currentTransform;
|
|
||||||
|
|
||||||
return params;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_swapchain g_swapchain;
|
|
||||||
|
|
||||||
rt_result rtCreateSwapchain(void) {
|
|
||||||
rt_device_swapchain_parameters device_params = DetermineSwapchainParameters();
|
|
||||||
|
|
||||||
uint32_t image_count = r_VkPreferredSwapchainImages.i;
|
|
||||||
if (image_count < 2)
|
|
||||||
image_count = 2;
|
|
||||||
else if (image_count > 3)
|
|
||||||
image_count = 3;
|
|
||||||
|
|
||||||
VkSwapchainCreateInfoKHR swapchain_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
|
|
||||||
.surface = g_gpu.surface,
|
|
||||||
.presentMode = device_params.present_mode,
|
|
||||||
.imageFormat = device_params.surface_format.format,
|
|
||||||
.imageColorSpace = device_params.surface_format.colorSpace,
|
|
||||||
.imageExtent = device_params.extent,
|
|
||||||
.preTransform = device_params.pre_transform,
|
|
||||||
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
|
|
||||||
.clipped = VK_TRUE,
|
|
||||||
.minImageCount = image_count,
|
|
||||||
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
|
||||||
.imageArrayLayers = 1,
|
|
||||||
.oldSwapchain = VK_NULL_HANDLE,
|
|
||||||
};
|
|
||||||
uint32_t queue_families[] = {g_gpu.graphics_family, g_gpu.present_family};
|
|
||||||
if (g_gpu.present_family != g_gpu.graphics_family) {
|
|
||||||
swapchain_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
|
|
||||||
swapchain_info.pQueueFamilyIndices = queue_families;
|
|
||||||
swapchain_info.queueFamilyIndexCount = 2;
|
|
||||||
} else {
|
|
||||||
swapchain_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
||||||
swapchain_info.pQueueFamilyIndices = NULL;
|
|
||||||
swapchain_info.queueFamilyIndexCount = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vkCreateSwapchainKHR(g_gpu.device,
|
|
||||||
&swapchain_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&g_swapchain.swapchain) != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to create the swapchain");
|
|
||||||
return 50;
|
|
||||||
}
|
|
||||||
g_swapchain.format = device_params.surface_format.format;
|
|
||||||
g_swapchain.extent = device_params.extent;
|
|
||||||
|
|
||||||
/* Retrieve images */
|
|
||||||
g_swapchain.image_count = 0;
|
|
||||||
vkGetSwapchainImagesKHR(g_gpu.device, g_swapchain.swapchain, &g_swapchain.image_count, NULL);
|
|
||||||
if (g_swapchain.image_count > RT_VK_MAX_SWAPCHAIN_IMAGES) {
|
|
||||||
rtReportError("vk", "Unsupported number of swapchain images: %u", g_swapchain.image_count);
|
|
||||||
return 51;
|
|
||||||
}
|
|
||||||
vkGetSwapchainImagesKHR(g_gpu.device,
|
|
||||||
g_swapchain.swapchain,
|
|
||||||
&g_swapchain.image_count,
|
|
||||||
g_swapchain.images);
|
|
||||||
|
|
||||||
/* Create image views */
|
|
||||||
for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
|
|
||||||
VkImageViewCreateInfo view_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
|
||||||
.image = g_swapchain.images[i],
|
|
||||||
.format = g_swapchain.format,
|
|
||||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
|
||||||
.components =
|
|
||||||
{
|
|
||||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
|
||||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
|
||||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
|
||||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
|
||||||
},
|
|
||||||
.subresourceRange =
|
|
||||||
{
|
|
||||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = 1,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.levelCount = 1,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
if (vkCreateImageView(g_gpu.device,
|
|
||||||
&view_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&g_swapchain.image_views[i]) != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to create an image view for the swapchain.");
|
|
||||||
return 52;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Create fences */
|
|
||||||
for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
|
|
||||||
VkFenceCreateInfo fence_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
|
||||||
/* Create as signalled so that we can wait on it the first time we render to that
|
|
||||||
swapchain image. */
|
|
||||||
.flags = VK_FENCE_CREATE_SIGNALED_BIT,
|
|
||||||
};
|
|
||||||
if (vkCreateFence(g_gpu.device,
|
|
||||||
&fence_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&g_swapchain.image_fences[i]) != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to create a fence for the swapchain");
|
|
||||||
return 53;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result rtRecreateSwapchain(void) {
|
|
||||||
/* TODO(Kevin): Old swapchain in swapchain create info */
|
|
||||||
rtDestroySwapchain();
|
|
||||||
return rtCreateSwapchain();
|
|
||||||
}
|
|
||||||
|
|
||||||
void rtDestroySwapchain(void) {
|
|
||||||
for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
|
|
||||||
vkDestroyFence(g_gpu.device, g_swapchain.image_fences[i], g_gpu.alloc_cb);
|
|
||||||
vkDestroyImageView(g_gpu.device, g_swapchain.image_views[i], g_gpu.alloc_cb);
|
|
||||||
}
|
|
||||||
vkDestroySwapchainKHR(g_gpu.device, g_swapchain.swapchain, g_gpu.alloc_cb);
|
|
||||||
}
|
|
@ -1,30 +0,0 @@
|
|||||||
#ifndef RT_VK_SWAPCHAIN_H
|
|
||||||
#define RT_VK_SWAPCHAIN_H
|
|
||||||
|
|
||||||
#include <volk/volk.h>
|
|
||||||
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
#define RT_VK_MAX_SWAPCHAIN_IMAGES 3
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkSwapchainKHR swapchain;
|
|
||||||
VkImage images[RT_VK_MAX_SWAPCHAIN_IMAGES];
|
|
||||||
VkImageView image_views[RT_VK_MAX_SWAPCHAIN_IMAGES];
|
|
||||||
VkFence image_fences[RT_VK_MAX_SWAPCHAIN_IMAGES];
|
|
||||||
uint32_t image_count;
|
|
||||||
VkFormat format;
|
|
||||||
VkExtent2D extent;
|
|
||||||
} rt_swapchain;
|
|
||||||
|
|
||||||
#ifndef RT_VK_DONT_DEFINE_SWAPCHAIN_GLOBAL
|
|
||||||
extern rt_swapchain g_swapchain;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
rt_result rtCreateSwapchain(void);
|
|
||||||
|
|
||||||
rt_result rtRecreateSwapchain(void);
|
|
||||||
|
|
||||||
void rtDestroySwapchain(void);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,263 +0,0 @@
|
|||||||
#include "transfers.h"
|
|
||||||
#include "command_buffers.h"
|
|
||||||
|
|
||||||
#include "runtime/config.h"
|
|
||||||
#include "runtime/mem_arena.h"
|
|
||||||
#include "runtime/threading.h"
|
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
|
|
||||||
RT_CVAR_I(rt_VkTransferSlotCount,
|
|
||||||
"Number of available transfer slots per frame. Default: 512",
|
|
||||||
512);
|
|
||||||
|
|
||||||
/* This is a temporary solution. We probably should keep a pool of buffers
|
|
||||||
* to avoid re-creating the buffers all the time. */
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkBuffer buffer;
|
|
||||||
VmaAllocation allocation;
|
|
||||||
bool requires_flush;
|
|
||||||
} rt_transfer_buffer;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
rt_transfer_buffer tbuf;
|
|
||||||
VkFence fence;
|
|
||||||
VkSemaphore ownership_transfer;
|
|
||||||
} rt_transfer;
|
|
||||||
|
|
||||||
static rt_transfer *_transfers;
|
|
||||||
static uint32_t _transfer_count;
|
|
||||||
static rt_mutex *_transfer_lock;
|
|
||||||
|
|
||||||
static rt_transfer_buffer AcquireTransferBuffer(size_t size) {
|
|
||||||
rt_transfer_buffer tbuf = {VK_NULL_HANDLE};
|
|
||||||
|
|
||||||
VkBufferCreateInfo buffer_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
||||||
.size = size,
|
|
||||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
|
||||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
|
||||||
};
|
|
||||||
VmaAllocationCreateInfo alloc_info = {
|
|
||||||
.usage = VMA_MEMORY_USAGE_AUTO,
|
|
||||||
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (vmaCreateBuffer(g_gpu.allocator,
|
|
||||||
&buffer_info,
|
|
||||||
&alloc_info,
|
|
||||||
&tbuf.buffer,
|
|
||||||
&tbuf.allocation,
|
|
||||||
NULL) == VK_SUCCESS) {
|
|
||||||
VkMemoryPropertyFlags props;
|
|
||||||
vmaGetAllocationMemoryProperties(g_gpu.allocator, tbuf.allocation, &props);
|
|
||||||
tbuf.requires_flush = (props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0;
|
|
||||||
}
|
|
||||||
return tbuf;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ReturnTransferBuffer(rt_transfer_buffer buffer) {
|
|
||||||
vmaDestroyBuffer(g_gpu.allocator, buffer.buffer, buffer.allocation);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void CopyToTransferBuffer(rt_transfer_buffer buffer, const void *data, size_t n) {
|
|
||||||
void *tmem = NULL;
|
|
||||||
vmaMapMemory(g_gpu.allocator, buffer.allocation, &tmem);
|
|
||||||
RT_ASSERT(tmem, "Transfer Buffer memory must be mappable.");
|
|
||||||
memcpy(tmem, data, n);
|
|
||||||
vmaUnmapMemory(g_gpu.allocator, buffer.allocation);
|
|
||||||
if (buffer.requires_flush)
|
|
||||||
vmaFlushAllocation(g_gpu.allocator, buffer.allocation, 0, n);
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result InitializeTransfers(void) {
|
|
||||||
_transfer_lock = rtCreateMutex();
|
|
||||||
if (!_transfer_lock)
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
_transfers = calloc((size_t)rt_VkTransferSlotCount.i, sizeof(rt_transfer));
|
|
||||||
if (!_transfers) {
|
|
||||||
rtDestroyMutex(_transfer_lock);
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
_transfer_count = 0;
|
|
||||||
return RT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ShutdownTransfers(void) {
|
|
||||||
rtDestroyMutex(_transfer_lock);
|
|
||||||
for (int i = 0; i < rt_VkTransferSlotCount.i; ++i) {
|
|
||||||
if (_transfers[i].fence)
|
|
||||||
vkDestroyFence(g_gpu.device, _transfers[i].fence, g_gpu.alloc_cb);
|
|
||||||
}
|
|
||||||
free(_transfers);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TRANSFER_FAILED -1
|
|
||||||
#define TRANSFER_NOT_NEEDED 0
|
|
||||||
#define TRANSFER_STARTED 1
|
|
||||||
|
|
||||||
static int AcquireBufferOwnership(rt_transfer *transfer,
|
|
||||||
VkBuffer buffer,
|
|
||||||
rt_gpu_queue current_owner,
|
|
||||||
VkCommandBuffer transfer_cmd) {
|
|
||||||
if (!transfer->ownership_transfer) {
|
|
||||||
VkSemaphoreCreateInfo sem_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
|
||||||
};
|
|
||||||
if (vkCreateSemaphore(g_gpu.device,
|
|
||||||
&sem_info,
|
|
||||||
g_gpu.alloc_cb,
|
|
||||||
&transfer->ownership_transfer) != VK_SUCCESS) {
|
|
||||||
rtReportError("vk", "Failed to create an ownership transfer semaphore.");
|
|
||||||
return TRANSFER_FAILED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t src_family = rtGetQueueFamily(current_owner);
|
|
||||||
uint32_t dst_family = rtGetQueueFamily(RT_TRANSFER_QUEUE);
|
|
||||||
if (src_family == dst_family)
|
|
||||||
return TRANSFER_NOT_NEEDED;
|
|
||||||
|
|
||||||
VkCommandBuffer cmd = rtAllocSingleCommandBuffer(current_owner);
|
|
||||||
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
|
||||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
|
|
||||||
vkBeginCommandBuffer(cmd, &begin_info);
|
|
||||||
VkBufferMemoryBarrier2 release_barrier = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
|
||||||
.buffer = buffer,
|
|
||||||
.offset = 0,
|
|
||||||
.size = VK_WHOLE_SIZE,
|
|
||||||
.srcStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
|
||||||
.srcAccessMask = 0,
|
|
||||||
.srcQueueFamilyIndex = src_family,
|
|
||||||
.dstQueueFamilyIndex = dst_family,
|
|
||||||
};
|
|
||||||
VkDependencyInfo dep = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
||||||
.pBufferMemoryBarriers = &release_barrier,
|
|
||||||
.bufferMemoryBarrierCount = 1};
|
|
||||||
vkCmdPipelineBarrier2(cmd, &dep);
|
|
||||||
vkEndCommandBuffer(cmd);
|
|
||||||
|
|
||||||
VkBufferMemoryBarrier2 acquire_barrier = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
|
||||||
.buffer = buffer,
|
|
||||||
.offset = 0,
|
|
||||||
.size = VK_WHOLE_SIZE,
|
|
||||||
.dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
|
|
||||||
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
|
||||||
.srcQueueFamilyIndex = src_family,
|
|
||||||
.dstQueueFamilyIndex = dst_family,
|
|
||||||
};
|
|
||||||
VkDependencyInfo dep2 = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
||||||
.pBufferMemoryBarriers = &acquire_barrier,
|
|
||||||
.bufferMemoryBarrierCount = 1};
|
|
||||||
vkCmdPipelineBarrier2(transfer_cmd, &dep2);
|
|
||||||
|
|
||||||
/* Only transfer the ownership when the frame is finished */
|
|
||||||
VkSemaphore wait_semaphore = VK_NULL_HANDLE;
|
|
||||||
rt_frame_data *frame = rtGetFrameData(g_gpu.current_frame_id);
|
|
||||||
wait_semaphore = frame->render_finished;
|
|
||||||
|
|
||||||
uint32_t dummy = 0;
|
|
||||||
if (rtSubmitSingleCommandBuffer(cmd,
|
|
||||||
&wait_semaphore,
|
|
||||||
&dummy,
|
|
||||||
1,
|
|
||||||
&transfer->ownership_transfer,
|
|
||||||
&dummy,
|
|
||||||
1,
|
|
||||||
current_owner,
|
|
||||||
VK_NULL_HANDLE) != RT_SUCCESS)
|
|
||||||
return TRANSFER_FAILED;
|
|
||||||
return TRANSFER_STARTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
rt_result rtUploadToBuffer(VkBuffer buffer,
|
|
||||||
VmaAllocation allocation,
|
|
||||||
rt_gpu_queue current_owner,
|
|
||||||
const void *data,
|
|
||||||
size_t nbytes) {
|
|
||||||
rtLockMutex(_transfer_lock);
|
|
||||||
rt_transfer *transfer =
|
|
||||||
(int)_transfer_count < rt_VkTransferSlotCount.i ? &_transfers[_transfer_count++] : NULL;
|
|
||||||
rtUnlockMutex(_transfer_lock);
|
|
||||||
if (!transfer)
|
|
||||||
return RT_NO_TRANSFER_SLOTS;
|
|
||||||
|
|
||||||
transfer->tbuf = AcquireTransferBuffer(nbytes);
|
|
||||||
if (!transfer->tbuf.buffer) {
|
|
||||||
return RT_OUT_OF_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
CopyToTransferBuffer(transfer->tbuf, data, nbytes);
|
|
||||||
|
|
||||||
if (!transfer->fence) {
|
|
||||||
VkFenceCreateInfo fence_info = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
|
||||||
};
|
|
||||||
if (vkCreateFence(g_gpu.device, &fence_info, g_gpu.alloc_cb, &transfer->fence) !=
|
|
||||||
VK_SUCCESS) {
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
VkCommandBuffer cmd = rtAllocSingleCommandBuffer(RT_TRANSFER_QUEUE);
|
|
||||||
VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
|
||||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
|
|
||||||
vkBeginCommandBuffer(cmd, &begin_info);
|
|
||||||
|
|
||||||
bool requires_ownership_transfer =
|
|
||||||
(current_owner != RT_TRANSFER_QUEUE && current_owner != RT_VK_UNOWNED);
|
|
||||||
if (requires_ownership_transfer) {
|
|
||||||
int did_transfer = AcquireBufferOwnership(transfer, buffer, current_owner, cmd);
|
|
||||||
if (did_transfer == -1)
|
|
||||||
return RT_UNKNOWN_ERROR;
|
|
||||||
else if (did_transfer == TRANSFER_NOT_NEEDED)
|
|
||||||
requires_ownership_transfer = false;
|
|
||||||
}
|
|
||||||
VkBufferCopy region = {.srcOffset = 0, .dstOffset = 0, .size = nbytes};
|
|
||||||
vkCmdCopyBuffer(cmd, transfer->tbuf.buffer, buffer, 1, ®ion);
|
|
||||||
vkEndCommandBuffer(cmd);
|
|
||||||
|
|
||||||
uint32_t dummy = 0;
|
|
||||||
return rtSubmitSingleCommandBuffer(cmd,
|
|
||||||
requires_ownership_transfer ? &transfer->ownership_transfer
|
|
||||||
: NULL,
|
|
||||||
requires_ownership_transfer ? &dummy : NULL,
|
|
||||||
requires_ownership_transfer ? 1 : 0,
|
|
||||||
NULL,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
RT_TRANSFER_QUEUE,
|
|
||||||
transfer->fence);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wait until transfers to gpu resources are finished. */
|
|
||||||
void rtFlushGPUTransfers(void) {
|
|
||||||
if (_transfer_count == 0)
|
|
||||||
return;
|
|
||||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
||||||
RT_ASSERT(temp.arena, "Could not get a temporary arena for flushing gpu transfers.");
|
|
||||||
rtLockMutex(_transfer_lock);
|
|
||||||
VkFence *fences = RT_ARENA_PUSH_ARRAY(temp.arena, VkFence, _transfer_count);
|
|
||||||
if (!fences) {
|
|
||||||
rtReportError("vk", "Failed to allocate fences array for flushing gpu transfers.");
|
|
||||||
rtUnlockMutex(_transfer_lock);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
uint32_t count = 0;
|
|
||||||
for (uint32_t i = 0; i < _transfer_count; ++i) {
|
|
||||||
if (!_transfers[i].fence || !_transfers[i].tbuf.buffer)
|
|
||||||
continue;
|
|
||||||
fences[count++] = _transfers[i].fence;
|
|
||||||
}
|
|
||||||
vkWaitForFences(g_gpu.device, count, fences, VK_TRUE, UINT64_MAX);
|
|
||||||
for (uint32_t i = 0; i < _transfer_count; ++i) {
|
|
||||||
if (!_transfers[i].fence || !_transfers[i].tbuf.buffer)
|
|
||||||
continue;
|
|
||||||
ReturnTransferBuffer(_transfers[i].tbuf);
|
|
||||||
}
|
|
||||||
_transfer_count = 0;
|
|
||||||
rtUnlockMutex(_transfer_lock);
|
|
||||||
}
|
|
@ -1,16 +0,0 @@
|
|||||||
#ifndef RT_VK_TRANSFERS_H
|
|
||||||
#define RT_VK_TRANSFERS_H
|
|
||||||
|
|
||||||
#include "gpu.h"
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
|
|
||||||
enum {
|
|
||||||
RT_NO_TRANSFER_SLOTS = RT_CUSTOM_ERROR_START,
|
|
||||||
};
|
|
||||||
|
|
||||||
rt_result rtUploadToBuffer(VkBuffer buffer, VmaAllocation allocation, rt_gpu_queue current_owner, const void *data, size_t nbytes);
|
|
||||||
|
|
||||||
/* Wait until transfers to gpu resources are finished. */
|
|
||||||
void rtFlushGPUTransfers(void);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,21 +0,0 @@
|
|||||||
#ifdef _MSC_VER
|
|
||||||
#pragma warning(push, 0)
|
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
|
||||||
#pragma GCC diagnostic ignored "-Wmissing-braces"
|
|
||||||
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
|
|
||||||
#pragma GCC diagnostic ignored "-Wconversion"
|
|
||||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
|
||||||
#pragma GCC diagnostic ignored "-Wparentheses"
|
|
||||||
#endif
|
|
||||||
#include <volk/volk.h>
|
|
||||||
#define VMA_STATIC_VULKAN_FUNCTIONS 0
|
|
||||||
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
|
|
||||||
#define VMA_IMPLEMENTATION
|
|
||||||
#include <vma/vk_mem_alloc.h>
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#pragma warning(pop)
|
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
#endif
|
|
Loading…
Reference in New Issue
Block a user