Rip out renderer code

THIS WILL NOT COMPILE
2024-06-04 11:45:55 +02:00 · 2024-06-04 11:45:55 +02:00 · b0e6839a1c
commit b0e6839a1c
parent 6b830f3ff2
56 changed files with 0 additions and 8227 deletions
--- a/src/gfx/builtin_objects.c
+++ b/src/gfx/builtin_objects.c
@ -1,10 +0,0 @@
 #define RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL
 #include "builtin_objects.h"
 rt_builtin_render_object_types g_builtin_render_object_types;
 RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void) {
    g_builtin_render_object_types.render_mesh =
        rtRegisterRenderObjectType(sizeof(rt_render_mesh), "render_mesh");
    return RT_SUCCESS;
 }
--- a/src/gfx/builtin_objects.h
+++ b/src/gfx/builtin_objects.h
@ -1,40 +0,0 @@
 #ifndef RT_GFX_BUILTIN_OBJECTS_H
 #define RT_GFX_BUILTIN_OBJECTS_H
 /* Render Object types used by the builtin graphics passes.
 *
 * As an user you are free to not use these, but then you
 * also cannot use the builtin render passes. */
 #include "renderer_api.h"
 #include "render_list.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 typedef struct {
    rt_pipeline_handle pipeline;
    rt_buffer_handle vbo;
    rt_buffer_handle ibo;
    uint32_t vertex_count;
    uint32_t index_count;
 } rt_render_mesh;
 typedef struct {
    rt_render_object_type render_mesh;
 } rt_builtin_render_object_types;
 #ifndef RT_DONT_DEFINE_BULTIN_OBJECTS_GLOBAL
 extern RT_DLLIMPORT rt_builtin_render_object_types g_builtin_render_object_types;
 #endif
 RT_DLLEXPORT rt_result rtRegisterBuiltinRenderObjectTypes(void);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/gfx/effect.c
+++ b/src/gfx/effect.c
@ -1,260 +0,0 @@
 #include "effect.h"
 #include "runtime/config.h"
 #include "runtime/ds.h"
 #include "runtime/handles.h"
 #include "runtime/hashing.h"
 #include "runtime/mem_arena.h"
 #include "runtime/threading.h"
 #include "runtime/atomics.h"
 #include <stdlib.h>
 #include <string.h>
 RT_CVAR_SZ(rt_EffectCacheSize, "The number of slots in the effect cache. Default: 1024", 1024);
 typedef struct {
    rt_resource_id resource;
    rt_effect effect;
    _Alignas(4) unsigned int refcount;
 } rt_effect_cache_slot;
 /* We use a hashtable to find previously loaded effects.
 * To reclaim unreferenced slots when we need to, we use a minheap.
 * The minheap implements a LRU list. To track usage, we use a global running "usage counter",
 * incremented whenever an effect is loaded.
 */
 typedef struct {
    rt_effect_cache_slot *slots;
    rt_hashtable lut;
    rt_minheap reclaim_heap;
    /* Linearly allocate slots until we reach capacity */
    size_t next_free;
    /* Used to track "time" since an effect was loaded */
    _Alignas(4) int usage_counter;
    void *memory;
    rt_rwlock lock;
 } rt_effect_cache;
 static rt_effect_cache _cache;
 rt_result InitEffectCache(void) {
    if (!RT_IS_POWER_OF_TWO(rt_EffectCacheSize.sz)) {
        rtReportError(
            "GFX",
            "The value of \"rt_EffectCacheSize\" must be a power of two.\nConfigured: %zu.",
            rt_EffectCacheSize.sz);
        return RT_INVALID_VALUE;
    }
    rt_create_rwlock_result lock_res = rtCreateRWLock();
    if (!lock_res.ok)
        return RT_UNKNOWN_ERROR;
    _cache.lock = lock_res.lock;
    size_t mem_required = sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz +
                          RT_HASH_TABLE_MEMORY_REQUIRED(
                              2 * rt_EffectCacheSize.sz) +      /* double to keep performance up */
                          sizeof(int) * rt_EffectCacheSize.sz + /* heap keys */
                          sizeof(size_t) * rt_EffectCacheSize.sz; /* heap values */
    _cache.memory = malloc(mem_required);
    if (!_cache.memory) {
        rtDestroyRWLock(&_cache.lock);
        return RT_OUT_OF_MEMORY;
    }
    _cache.lut = rtCreateHashtable(rt_EffectCacheSize.sz, _cache.memory, NULL, NULL);
    int *keys =
        (int *)((char *)_cache.memory + RT_HASH_TABLE_MEMORY_REQUIRED(2 * rt_EffectCacheSize.sz));
    size_t *values       = (size_t *)(keys + rt_EffectCacheSize.sz);
    _cache.reclaim_heap  = rtCreateMinheap(keys, values, sizeof(size_t), rt_EffectCacheSize.sz, 0);
    _cache.usage_counter = 0;
    _cache.slots = (rt_effect_cache_slot *)(values + rt_EffectCacheSize.sz);
    memset(_cache.slots, 0, sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz);
    return RT_SUCCESS;
 }
 void ShutdownEffectCache(void) {
    free(_cache.memory);
 }
 RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len) {
    uint32_t id = rtHashBytes32(name, len);
    if (id == 0)
        id = ~id;
    return id;
 }
 RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len) {
    uint32_t id = rtHashBytes32(name, len);
    if (id == 0)
        id = ~id;
    return id;
 }
 static void ReleaseEffect(rt_effect *effect) {
    for (unsigned int i = 0; i < effect->pass_count; ++i) {
        g_renderer.DestroyPipeline(effect->passes[i].pipeline);
    }
 }
 /* Returns the index of the reserved slot */
 static size_t ReserveSlot(rt_resource_id id) {
    if (_cache.next_free < rt_EffectCacheSize.sz) {
        size_t slot = _cache.next_free++;
        RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use.");
        _cache.slots[slot].refcount = 1;
        if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) {
            rtLog("GFX", "Failed to insert effect %x into the lookup table.", id);
            _cache.slots[slot].refcount = 0;
            rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot);
            return SIZE_MAX;
        }
        _cache.slots[slot].resource = id;
        return slot;
    } else if (!rtMinheapIsEmpty(&_cache.reclaim_heap)) {
        size_t slot;
        rtMinheapPop(&_cache.reclaim_heap, &slot);
        RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use.");
        _cache.slots[slot].refcount = 1;
        rt_resource_id old_id       = _cache.slots[slot].resource;
        RT_ASSERT(old_id != RT_INVALID_RESOURCE_ID, "The slot should contain an old effect.");
        ReleaseEffect(&_cache.slots[slot].effect);
        rtHashtableRemove(&_cache.lut, old_id);
        if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) {
            rtLog("GFX", "Failed to insert effect %x into the lookup table.", id);
            _cache.slots[slot].refcount = 0;
            rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot);
            return SIZE_MAX;
        }
        _cache.slots[slot].resource = id;
        return slot;
    } else {
        rtLog("GFX",
              "Could not insert effect %x into the cache, because the effect cache is full.",
              id);
        return SIZE_MAX;
    }
 }
 /* Load resource to memory allocated on the given arena */
 static rt_result LoadResource(rt_resource_id id, void **p_out, rt_arena *arena) {
    size_t size = rtGetResourceSize(id);
    if (!size) {
        rtLog("GFX", "ID %x is not a valid resource.", id);
        return RT_INVALID_VALUE;
    }
    void *dst = rtArenaPush(arena, size);
    if (!dst) {
        rtLog("GFX", "Failed to allocate %zu bytes of temporary storage.", size);
        return RT_OUT_OF_MEMORY;
    }
    *p_out = dst;
    return rtGetResource(id, dst);
 }
 static rt_result LoadEffect(rt_resource_id id, rt_effect *effect) {
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    if (!temp.arena) {
        rtLog("GFX", "Could not get a temporary arena.");
        return RT_OUT_OF_MEMORY;
    }
    const rt_resource *resource = NULL;
    rt_result res               = LoadResource(id, &resource, temp.arena);
    if (res != RT_SUCCESS) {
        rtReturnTemporaryArena(temp);
        return res;
    }
    if (resource->type != RT_RESOURCE_EFFECT) {
        rtReturnTemporaryArena(temp);
        rtLog("GFX", "Resource %x does not refer to an effect resource.", id);
        return RT_INVALID_VALUE;
    }
    const rt_effect_info *effect_info = resource->data;
    effect->pass_count                = effect_info->pass_count;
    for (unsigned int i = 0; i < effect_info->pass_count; ++i) {
        rt_resource *pipeline_resource = NULL;
        res = LoadResource(effect_info->passes[i].pipeline, &pipeline_resource, temp.arena);
        if (res != RT_SUCCESS) {
            rtReturnTemporaryArena(temp);
            return res;
        }
        if (pipeline_resource->type != RT_RESOURCE_PIPELINE) {
            rtReturnTemporaryArena(temp);
            rtLog("GFX", "Resource %x does not refer to a pipeline resource.", id);
            return RT_INVALID_VALUE;
        }
        rt_pipeline_info *pipeline_info = pipeline_resource->data;
        rt_pipeline_handle pipeline     = g_renderer.CompilePipeline(pipeline_info);
        if (!RT_IS_HANDLE_VALID(pipeline)) {
            rtReturnTemporaryArena(temp);
            rtLog("GFX",
                  "Failed to compile the pipeline of pass %d (%x).",
                  i,
                  effect_info->passes[i].pass_id);
            return RT_UNKNOWN_ERROR;
        }
        effect->passes[i].pass_id  = effect_info->passes[i].pass_id;
        effect->passes[i].pipeline = pipeline;
    }
    rtReturnTemporaryArena(temp);
    return RT_SUCCESS;
 }
 RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect) {
    rtAtomic32Inc(&_cache.usage_counter);
    /* Check if the effect is already loaded */
    rtLockRead(&_cache.lock);
    uint64_t slot = rtHashtableLookup(&_cache.lut, id, UINT64_MAX);
    if (slot != UINT64_MAX) {
        RT_ASSERT(_cache.slots[slot].resource == id, "Got the wrong effect");
        rtAtomic32Inc(&_cache.slots[slot].refcount);
        *effect = &_cache.slots[slot].effect;
        rtUnlockRead(&_cache.lock);
        return RT_SUCCESS;
    }
    rtUnlockRead(&_cache.lock);
    /* Load the effect */
    rtLockWrite(&_cache.lock);
    if (rtHashtableLookup(&_cache.lut, id, UINT64_MAX) != UINT64_MAX) {
        /* Another thread was faster than we, just retry */
        rtUnlockWrite(&_cache.lock);
        return rtLoadEffect(id, effect);
    }
    slot = ReserveSlot(id);
    if (slot == SIZE_MAX) {
        rtUnlockWrite(&_cache.lock);
        return RT_OUT_OF_MEMORY;
    }
    rt_result res = LoadEffect(id, &_cache.slots[slot].effect);
    rtUnlockWrite(&_cache.lock);
    *effect = &_cache.slots[slot].effect;
    return res;
 }
 RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect) {
    RT_VERIFY(effect);
    rt_effect_cache_slot *slot = (rt_effect_cache_slot *)((char *)effect - offsetof(rt_effect_cache_slot, effect));
    if (rtAtomic32Dec(&slot->refcount) == 0) {
        rtLockWrite(&_cache.lock);
        size_t slot_index = (size_t)(slot - _cache.slots);
        rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot_index);
        rtUnlockWrite(&_cache.lock);
    }
 }
--- a/src/gfx/effect.h
+++ b/src/gfx/effect.h
@ -1,60 +0,0 @@
 #ifndef RT_GFX_EFFECT_H
 #define RT_GFX_EFFECT_H
 /* A effect lists the passes during which an object needs to be rendered
 * and a pipeline for each pass.
 * The effect also defines the required vertex layout per pass.
 */
 #include "gfx.h"
 #include "renderer_api.h"
 #include "runtime/resources.h"
 /* *** Resource types *** */
 typedef struct rt_pipeline_info_s {
    rt_resource_id vertex_shader;
    rt_resource_id fragment_shader;
    rt_resource_id compute_shader;
    /* TODO(Kevin): Fixed function settings */
 } rt_pipeline_info;
 typedef struct {
    /* Id of the render pass during which this effect pass is run. */
    uint32_t pass_id;
    rt_resource_id pipeline;
 } rt_effect_pass_info;
 typedef struct {
    uint32_t pass_count;
    rt_effect_pass_info passes[RT_MAX_SUBRESOURCES];
 } rt_effect_info;
 /* *** Runtime types *** */
 typedef struct {
    uint32_t pass_id;
    rt_pipeline_handle pipeline;
 } rt_effect_pass;
 typedef struct {
    uint32_t pass_count;
    rt_effect_pass passes[RT_MAX_SUBRESOURCES];
 } rt_effect;
 RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len);
 RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len);
 /* Load an effect from a resource file.
 * Returns:
 * - RT_SUCCESS
 * - RT_OUT_OF_MEMORY, if temporary memory allocations failed
 * - RT_INVALID_VALUE, if id does not refer to an effect resource.
 * - RT_UNKNOWN_ERROR, if a pipeline failed to compile
 * - errors returned by rtGetResource() */
 RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect);
 RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect);
 #endif
--- a/src/gfx/gfx.h
+++ b/src/gfx/gfx.h
@ -1,88 +0,0 @@
 #ifndef RT_GFX_H
 #define RT_GFX_H
 /* graphics system. this is the interface of the rendering code.
 *
 * we need (at least) three different renderers:
 * - world cell renderer (for world & dungeon environments)
 * - character renderer (for animated models)
 * - object renderer (for static models)
 */
 #include <stdint.h>
 #include "runtime/runtime.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 #if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpedantic"
 #elif defined(_MSC_VER)
 #pragma warning(push)
 #pragma warning(disable : 4201) /* anonymous struct */
 #endif
 typedef union {
    float v[4];
    struct {
        float r;
        float g;
        float b;
        float a;
    };
 } rt_color;
 #if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
 #elif defined(_MSC_VER)
 #pragma warning(pop)
 #endif
 /* NOTE(kevin): When you add a value here, you need to handle them in
 * framegraph_processor.c : ParseFramegraph
 * and in the render target and texture functions of all renderers. */
 typedef enum {
    RT_PIXEL_FORMAT_INVALID,
    RT_PIXEL_FORMAT_R8G8B8A8_UNORM,
    RT_PIXEL_FORMAT_B8G8R8A8_UNORM,
    RT_PIXEL_FORMAT_R8G8B8A8_SRGB,
    RT_PIXEL_FORMAT_B8G8R8A8_SRGB,
    RT_PIXEL_FORMAT_R8G8B8_UNORM,
    RT_PIXEL_FORMAT_B8G8R8_UNORM,
    RT_PIXEL_FORMAT_R8G8B8_SRGB,
    RT_PIXEL_FORMAT_B8G8R8_SRGB,
    RT_PIXEL_FORMAT_DEPTH24_STENCIL8,
    RT_PIXEL_FORMAT_DEPTH32,
    /* Special value indicating whichever format the swapchain uses */
    RT_PIXEL_FORMAT_SWAPCHAIN,
    RT_PIXEL_FORMAT_count,
 } rt_pixel_format;
 RT_INLINE int rtIsDepthFormat(rt_pixel_format format) {
    return format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8 || format == RT_PIXEL_FORMAT_DEPTH32;
 }
 /* In renderer_api.h -> Not necessary for almost all gfx usage */
 typedef struct rt_renderer_init_info_s rt_renderer_init_info;
 RT_DLLEXPORT void rtRegisterRendererCVars(void);
 RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info);
 RT_DLLEXPORT void rtShutdownGFX(void);
 RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id);
 RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/gfx/gfx_main.c
+++ b/src/gfx/gfx_main.c
@ -1,224 +0,0 @@
 #include <stdbool.h>
 #include <string.h>
 #define RT_DONT_DEFINE_RENDERER_GLOBAL
 #include "gfx.h"
 #include "renderer_api.h"
 #include "runtime/config.h"
 #include "runtime/dynamic_libs.h"
 /* Attributes are used to bind buffers (or textures) to symbolic values.
 * For example, an attribute might be bound to "CELL_GRID", which would be
 * replaced with the (at the time of the invoke) grid buffer of the current
 * world cell.
 */
 rt_renderer_api g_renderer;
 #ifndef RT_STATIC_LIB
 static rt_dynlib _renderer_lib;
 #endif
 static bool _renderer_loaded = false;
 RT_DLLEXPORT
 RT_CVAR_S(rt_Renderer,
          "Select the render backend. Available options: [vk, dx11, null], Default: vk",
          "dx11");
 extern rt_cvar rt_RenderViewArenaSize;
 extern rt_cvar rt_RenderListPoolSize;
 #ifdef RT_STATIC_LIB
 extern void RT_RENDERER_API_FN(RegisterCVars)(void);
 extern rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *);
 extern void RT_RENDERER_API_FN(Shutdown)(void);
 extern unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void);
 extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int);
 extern void RT_RENDERER_API_FN(EndFrame)(unsigned int);
 extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *);
 extern void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle);
 extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t,
                                                         const rt_alloc_command_buffer_info *,
                                                         rt_command_buffer_handle *);
 extern rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue,
                                                          const rt_submit_command_buffers_info *);
 extern rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t,
                                                      const rt_gpu_semaphore_info *,
                                                      rt_gpu_semaphore_handle *);
 extern void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *);
 extern uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle);
 extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void);
 extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void);
 extern rt_result
    RT_RENDERER_API_FN(CreateBuffers)(uint32_t, const rt_buffer_info *, rt_buffer_handle *);
 extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *);
 extern rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void);
 extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *);
 extern rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *, unsigned int);
 extern void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
                                                 uint32_t pass_id,
                                                 rt_render_view view,
                                                 unsigned int frame_id);
 extern void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph, unsigned int frame_id);
 extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle,
                                             const rt_cmd_begin_pass_info *);
 extern void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle);
 extern void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle,
                                                          rt_render_target_handle,
                                                          rt_render_target_state);
 extern void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle,
                                                          rt_render_target_handle);
 extern void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle, rt_pipeline_handle);
 extern void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle,
                                                     uint32_t,
                                                     uint32_t,
                                                     const rt_buffer_handle *,
                                                     const uint32_t *,
                                                     const uint32_t *);
 extern void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle, uint32_t, uint32_t);
 #endif
 extern rt_result InitRenderLists(void);
 extern void ShutdownRenderLists(void);
 extern void ResetRenderLists(unsigned int frame_id);
 extern rt_result InitRenderViews(void);
 extern void ShutdownRenderViews(void);
 extern void ResetRenderViews(unsigned int frame_id);
 extern rt_result InitEffectCache(void);
 extern void ShutdownEffectCache(void);
 static bool LoadRenderer(void) {
 #if !defined(RT_STATIC_LIB)
 #define RETRIEVE_SYMBOL(name, type)                                                                \
    g_renderer.name = (type *)rtGetSymbol(_renderer_lib, "rtRen" #name);                           \
    if (!g_renderer.name) {                                                                        \
        rtReportError("GFX",                                                                       \
                      "Unable to retrieve renderer function %s from backend %s",                   \
                      #name,                                                                       \
                      rt_Renderer.s);                                                              \
    }
    if (strcmp(rt_Renderer.s, "vk") == 0) {
        _renderer_lib = rtOpenLib(RT_DLLNAME("rtvk"));
        if (!_renderer_lib) {
            rtReportError("GFX", "Unable to load renderer backend: %s", RT_DLLNAME("rtvk"));
            return false;
        }
        RETRIEVE_SYMBOL(RegisterCVars, rt_register_renderer_cvars_fn);
        RETRIEVE_SYMBOL(Init, rt_init_renderer_fn);
        RETRIEVE_SYMBOL(Shutdown, rt_shutdown_renderer_fn);
        RETRIEVE_SYMBOL(GetMaxFramesInFlight, rt_get_max_frames_in_flight_fn);
        RETRIEVE_SYMBOL(BeginFrame, rt_begin_frame_fn);
        RETRIEVE_SYMBOL(EndFrame, rt_end_frame_fn);
        RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn);
        RETRIEVE_SYMBOL(DestroyPipeline, rt_destroy_pipeline_fn);
        RETRIEVE_SYMBOL(AllocCommandBuffers, rt_alloc_command_buffers_fn);
        RETRIEVE_SYMBOL(SubmitCommandBuffers, rt_submit_command_buffers_fn);
        RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn);
        RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn);
        RETRIEVE_SYMBOL(CreateRenderGrapbuilder, rt_create_render_graph_builder_fn);
        RETRIEVE_SYMBOL(DestroyRenderGraphBuilder, rt_destroy_render_graph_builder_fn);
        RETRIEVE_SYMBOL(ExecuteRenderGraph, rt_execute_render_graph_fn);
        RETRIEVE_SYMBOL(SubmitRenderView, rt_submit_render_view_fn);
        RETRIEVE_SYMBOL(ResetRenderGraph, rt_reset_render_graph_fn);
        RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn);
        RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn);
        RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn);
        RETRIEVE_SYMBOL(CmdFlushRenderTargetWrite, rt_cmd_flush_render_target_write_fn);
        RETRIEVE_SYMBOL(CmdBindPipeline, rt_cmd_bind_pipeline_fn);
        RETRIEVE_SYMBOL(CmdBindVertexBuffers, rt_cmd_bind_vertex_buffers_fn);
        RETRIEVE_SYMBOL(CmdDraw, rt_cmd_draw_fn);
    } else {
        rtReportError("GFX",
                      "Unsupported renderer backend: (%s) %s",
                      rt_Renderer.name,
                      rt_Renderer.s);
        return false;
    }
 #undef RETRIEVE_SYMBOL
 #else
    g_renderer.RegisterCVars             = &rtRenRegisterCVars;
    g_renderer.Init                      = &rtRenInit;
    g_renderer.Shutdown                  = &rtRenShutdown;
    g_renderer.GetMaxFramesInFlight      = &rtRenGetMaxFramesInFlight;
    g_renderer.BeginFrame                = &rtRenBeginFrame;
    g_renderer.EndFrame                  = &rtRenEndFrame;
    g_renderer.CompilePipeline           = &rtRenCompilePipeline;
    g_renderer.DestroyPipeline           = &rtRenDestroyPipeline;
    g_renderer.AllocCommandBuffers       = &rtRenAllocCommandBuffers;
    g_renderer.SubmitCommandBuffers      = &rtRenSubmitCommandBuffers;
    g_renderer.CreateBuffers             = &rtRenCreateBuffers;
    g_renderer.DestroyBuffers            = &rtRenDestroyBuffers;
    g_renderer.CreateRenderGraphBuilder  = &rtRenCreateRenderGraphBuilder;
    g_renderer.DestroyRenderGraphBuilder = &rtRenDestroyRenderGraphBuilder;
    g_renderer.ExecuteRenderGraph        = &rtRenExecuteRenderGraph;
    g_renderer.SubmitRenderView          = &rtRenSubmitRenderView;
    g_renderer.ResetRenderGraph          = &rtRenResetRenderGraph;
    g_renderer.CmdBeginPass              = &rtRenCmdBeginPass;
    g_renderer.CmdEndPass                = &rtRenCmdEndPass;
    g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget;
    g_renderer.CmdFlushRenderTargetWrite = &rtRenCmdFlushRenderTargetWrite;
    g_renderer.CmdBindPipeline           = &rtRenCmdBindPipeline;
    g_renderer.CmdBindVertexBuffers      = &rtRenCmdBindVertexBuffers;
    g_renderer.CmdDraw                   = &rtRenCmdDraw;
 #endif
    return true;
 }
 RT_DLLEXPORT void rtRegisterRendererCVars(void) {
    if (!_renderer_loaded) {
        if (!LoadRenderer())
            return;
        _renderer_loaded = true;
    }
    g_renderer.RegisterCVars();
 }
 RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) {
    rtRegisterCVAR(&rt_Renderer);
    rtRegisterCVAR(&rt_RenderViewArenaSize);
    rtRegisterCVAR(&rt_RenderListPoolSize);
    if (!_renderer_loaded) {
        if (!LoadRenderer())
            return RT_UNKNOWN_ERROR;
        g_renderer.RegisterCVars();
    }
    rt_result result;
    if ((result = g_renderer.Init(renderer_info)) != RT_SUCCESS)
        return result;
    if ((result = InitRenderLists()) != RT_SUCCESS)
        return result;
    if ((result = InitRenderViews()) != RT_SUCCESS)
        return result;
    if ((result = InitEffectCache()) != RT_SUCCESS)
        return result;
    return result;
 }
 RT_DLLEXPORT void rtShutdownGFX(void) {
    ShutdownEffectCache();
    ShutdownRenderViews();
    ShutdownRenderLists();
    g_renderer.Shutdown();
 }
 RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id) {
    g_renderer.BeginFrame(frame_id);
 }
 RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id) {
    g_renderer.EndFrame(frame_id);
    ResetRenderLists(frame_id);
    ResetRenderViews(frame_id);
 }
--- a/src/gfx/meson.build
+++ b/src/gfx/meson.build
@ -1,25 +0,0 @@
 gfx_deps = [thread_dep, m_dep]
 gfx_lib = library('rtgfx',
  # Project Sources
  'builtin_objects.h',
  'effect.h',
  'gfx.h',
  'renderer_api.h',
  'render_list.h',
  'render_view.h',
  'builtin_objects.c',
  'effect.c',
  'gfx_main.c',
  'render_list.c',
  'render_view.c',
  # Contrib Sources
  dependencies : gfx_deps,
  include_directories : engine_incdir,
  link_with : runtime_lib,
  c_pch : 'pch/gfx_pch.h',
  install : true)
 engine_libs += gfx_lib
 engine_lib_paths += gfx_lib.full_path()
--- a/src/gfx/pch/gfx_pch.h
+++ b/src/gfx/pch/gfx_pch.h
@ -1,11 +0,0 @@
 /* Stdlib */
 #include <stdint.h>
 /* Project */
 #include "gfx.h"
 /* Commonly used runtime headers */
 #include "runtime/runtime.h"
 #include "runtime/threading.h"
 #include "runtime/mem_arena.h"
 #include "runtime/config.h"
--- a/src/gfx/render_list.c
+++ b/src/gfx/render_list.c
@ -1,207 +0,0 @@
 #include "render_list.h"
 #include "renderer_api.h"
 #include "runtime/config.h"
 #include "runtime/mem_arena.h"
 #include "runtime/threading.h"
 #include <string.h>
 RT_CVAR_I(rt_RenderListPoolSize,
          "Size of the pool allocated for render lists in bytes. Default: 8 MiB",
          RT_MB(8));
 typedef struct {
    size_t size;
    const char *name;
 } rt_render_object_type_data;
 typedef struct rt_list_pool_s {
    size_t capacity;
    struct rt_list_pool_s *next;
 } rt_list_pool;
 typedef struct {
    rt_mutex *lock;
    rt_list_pool *first_free;
    rt_arena arena;
    unsigned int access_frame_id;
 } rt_frame_lists;
 #define DEFAULT_LIST_CAPACITY RT_KB(1)
 static rt_render_object_type_data _types[RT_MAX_RENDER_OBJECT_TYPE + 1];
 static unsigned int _type_count = 0;
 static rt_rwlock _type_lock;
 static rt_frame_lists _frame_lists[4];
 static unsigned int _max_frames_in_flight;
 rt_result InitRenderLists(void) {
    rt_create_rwlock_result lock_res = rtCreateRWLock();
    if (!lock_res.ok)
        return RT_UNKNOWN_ERROR;
    _type_lock = lock_res.lock;
    _max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
    RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frame_lists),
              "Invalid maxium number of in-flight frames.");
    for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
        rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderListPoolSize.i);
        if (!arena_res.ok) {
            rtDestroyRWLock(&_type_lock);
            return RT_OUT_OF_MEMORY;
        }
        _frame_lists[i].arena = arena_res.arena;
        _frame_lists[i].lock = rtCreateMutex();
        if (!_frame_lists[i].lock) {
            rtReleaseArena(&_frame_lists[i].arena);
            rtDestroyRWLock(&_type_lock);
        }
        _frame_lists[i].first_free      = NULL;
        _frame_lists[i].access_frame_id = 0;
    }
    return RT_SUCCESS;
 }
 void ShutdownRenderLists(void) {
    rtDestroyRWLock(&_type_lock);
    for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
        rtDestroyMutex(_frame_lists[i].lock);
        rtReleaseArena(&_frame_lists[i].arena);
    }
 }
 RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size,
                                                              const char *debug_name) {
    if (_type_count == RT_MAX_RENDER_OBJECT_TYPE) {
        rtReportError("GFX", "Too many render object types (max is %u)", RT_MAX_RENDER_OBJECT_TYPE);
        return RT_INVALID_RENDER_OBJECT_TYPE;
    }
    rtLockWrite(&_type_lock);
    rt_render_object_type type = (rt_render_object_type)++_type_count;
    _types[_type_count].size   = object_size;
    _types[_type_count].name   = debug_name;
    if (debug_name)
        rtLog("GFX",
              "Registered render object type %s; object size: %zu. Type: %u",
              debug_name,
              object_size,
              _type_count);
    else
        rtLog("GFX",
              "Registered unnamed render object type; object size: %zu. Type: %u",
              object_size,
              _type_count);
    rtUnlockWrite(&_type_lock);
    return type;
 }
 RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type) {
    size_t size = 0;
    rtLockRead(&_type_lock);
    if (type > RT_INVALID_RENDER_OBJECT_TYPE && type <= _type_count)
        size = _types[type].size;
    rtUnlockRead(&_type_lock);
    return size;
 }
 RT_DLLEXPORT const char *rtGetRenderObjectTypeDebugName(rt_render_object_type type) {
    const char *name = NULL;
    rtLockRead(&_type_lock);
    if (type > RT_INVALID_RENDER_OBJECT_TYPE && type <= _type_count)
        name = _types[type].name;
    rtUnlockRead(&_type_lock);
    return name;
 }
 static rt_create_render_list_result
 CreateNewList(rt_render_object_type type, unsigned int frame_id, size_t capacity) {
    rt_create_render_list_result res = {.ok = false};
    unsigned int slot                = frame_id % _max_frames_in_flight;
    rtLockMutex(_frame_lists[slot].lock);
    _frame_lists[slot].access_frame_id = frame_id;
    if (!_frame_lists[slot].first_free ||
        _frame_lists[slot].first_free->capacity < capacity) { /* Allocate a new list */
        rt_list_pool *pool = rtArenaPush(&_frame_lists[slot].arena,
                                         sizeof(rt_list_pool) + sizeof(unsigned int) + capacity);
        if (!pool) {
            rtReportError("GFX",
                          "Out of render list pool space! Configured space: %d kiB",
                          rt_RenderListPoolSize.i / 1024);
            goto out;
        }
        pool->capacity                = capacity;
        pool->next                    = _frame_lists[slot].first_free;
        _frame_lists[slot].first_free = pool;
    }
    rt_render_list list;
    unsigned int *frame_id_store =
        (unsigned int *)((char *)_frame_lists[slot].first_free + sizeof(rt_list_pool));
    *frame_id_store = frame_id;
    list.data = (char *)_frame_lists[slot].first_free + sizeof(rt_list_pool) + sizeof(unsigned int);
    list.type = type;
    list.length                   = 0;
    res.ok                        = true;
    res.list                      = list;
    _frame_lists[slot].first_free = _frame_lists[slot].first_free->next;
 out:
    rtUnlockMutex(_frame_lists[slot].lock);
    return res;
 }
 RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type,
                                                             unsigned int frame_id) {
    return CreateNewList(type, frame_id, DEFAULT_LIST_CAPACITY);
 }
 void ResetRenderLists(unsigned int frame_id) {
    unsigned int slot = frame_id % _max_frames_in_flight;
    RT_ASSERT(_frame_lists[slot].access_frame_id == frame_id ||
                  _frame_lists[slot].access_frame_id == 0,
              "Frame id mismatch");
    rtLockMutex(_frame_lists[slot].lock);
    _frame_lists[slot].first_free      = NULL;
    _frame_lists[slot].access_frame_id = 0;
    rtArenaClear(&_frame_lists[slot].arena);
    rtUnlockMutex(_frame_lists[slot].lock);
 }
 RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object) {
    size_t object_size = rtGetRenderObjectSize(list->type);
    rt_list_pool *pool =
        (rt_list_pool *)((char *)list->data - sizeof(rt_list_pool) - sizeof(unsigned int));
    unsigned int frame_id = *(unsigned int *)((char *)list->data - sizeof(unsigned int));
    size_t list_capacity  = pool->capacity / object_size;
    if (list->length == list_capacity) {
        /* "Grow" the list */
        rt_create_render_list_result list_res =
            CreateNewList(list->type, frame_id, pool->capacity * 2);
        if (!list_res.ok)
            return false;
        memcpy(list_res.list.data, list->data, list->length * object_size);
        unsigned int slot = frame_id % _max_frames_in_flight;
        rtLockMutex(_frame_lists[slot].lock);
        pool->next                    = _frame_lists[slot].first_free;
        _frame_lists[slot].first_free = pool;
        rtUnlockMutex(_frame_lists[slot].lock);
        list_res.list.length = list->length;
        *list                = list_res.list;
    }
    char *dst = (char *)list->data + list->length * object_size;
    memcpy(dst, object, object_size);
    ++list->length;
    return true;
 }
--- a/src/gfx/render_list.h
+++ b/src/gfx/render_list.h
@ -1,70 +0,0 @@
 #ifndef RT_RENDER_LIST_H
 #define RT_RENDER_LIST_H
 /* a render list collects render objects. */
 #include <stdint.h>
 #include <stdbool.h>
 #include "runtime/runtime.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* Identifies a type of render objects. */
 typedef uint32_t rt_render_object_type;
 typedef uint32_t rt_render_object_type_mask;
 enum {
    RT_INVALID_RENDER_OBJECT_TYPE = 0,
    RT_MAX_RENDER_OBJECT_TYPE     = 32,
 };
 #define RT_RENDER_OBJECT_TYPE_BIT(type) (1u << ((type)-1))
 /* Registers a new render object type.
 * debug_name is optional and may be NULL.
 */
 RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size,
                                                              const char *debug_name);
 RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type);
 RT_DLLEXPORT const char *rtGetRenderObjectTypeDebugName(rt_render_object_type type);
 typedef struct {
    rt_render_object_type type;
    size_t length;
    void *data;
 } rt_render_list;
 /* Returns a pointer to the i-th render list element.
 * Works for every valid type, because the size is determined at runtime. */
 RT_INLINE void *rtGetRenderListElement(const rt_render_list *list, size_t index) {
    size_t size = rtGetRenderObjectSize(list->type);
    return (char *)list->data + size * index;
 }
 /* Returns the i-th render list element, cast to type T.
 * Saves a rtGetRenderObjectSize call, if the type is known beforehand. */
 #define RT_GET_RENDER_LIST_ELEMENT(list, T, index) *(((T *)(list).data) + (index))
 typedef struct {
    bool ok;
    rt_render_list list;
 } rt_create_render_list_result;
 /* Create a render list for a particular object type.
 *
 * Render Lists have a lifetime of one frame. */
 RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type, unsigned int frame_id);
 /* Append a render object to a list. The object must be of the correct type. */
 RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/gfx/render_view.c
+++ b/src/gfx/render_view.c
@ -1,122 +0,0 @@
 #include "render_view.h"
 #include "renderer_api.h"
 #include "runtime/config.h"
 #include "runtime/mem_arena.h"
 #include "runtime/threading.h"
 RT_CVAR_I(rt_RenderViewArenaSize,
          "Size of the memory arena used for allocating render views. Default: 1 MB",
          RT_MB(1));
 typedef struct {
    rt_arena arena;
    rt_mutex *lock;
    uint32_t frame_id;
 } rt_frame_views;
 static rt_frame_views _frames[4];
 static unsigned int _max_frames_in_flight;
 rt_result InitRenderViews(void) {
    _max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
    RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frames),
              "Invalid maximum number of in-flight frames.");
    for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
        rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderViewArenaSize.i);
        if (!arena_res.ok)
            return RT_OUT_OF_MEMORY;
        _frames[i].arena = arena_res.arena;
        _frames[i].lock  = rtCreateMutex();
        if (!_frames[i].lock) {
            rtReleaseArena(&_frames[i].arena);
            return RT_UNKNOWN_ERROR;
        }
        _frames[i].frame_id = 0;
    }
    return RT_SUCCESS;
 }
 void ShutdownRenderViews(void) {
    for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
        rtDestroyMutex(_frames[i].lock);
        rtReleaseArena(&_frames[i].arena);
    }
 }
 void ResetRenderViews(unsigned int frame_id) {
    unsigned int slot = frame_id % _max_frames_in_flight;
    rtArenaClear(&_frames[slot].arena);
 }
 RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types,
                                                             uint32_t type_count,
                                                             unsigned int frame_id) {
 #ifdef RT_DEBUG
    for (uint32_t i = 0; i < type_count - 1; ++i) {
        for (uint32_t j = i + 1; j < type_count; ++j) {
            RT_ASSERT(types[i] != types[j], "Duplicate render list type detected.");
        }
    }
 #endif
    unsigned int slot = frame_id % _max_frames_in_flight;
    size_t size = type_count * (sizeof(rt_render_list) + sizeof(rt_render_list));
    rtLockMutex(_frames[slot].lock);
    void *storage = rtArenaPush(&_frames[slot].arena, size);
    _frames[slot].frame_id = frame_id;
    rtUnlockMutex(_frames[slot].lock);
    if (!storage) {
        return (rt_create_render_view_result){
            .ok = false,
        };
    }
    rt_render_view view;
    view.lists      = storage;
    view.list_types = (rt_render_object_type *)(view.lists + type_count);
    view.list_count = type_count;
    view.type_mask  = 0; 
    for (uint32_t i = 0; i < type_count; ++i) {
        rt_create_render_list_result list_res = rtCreateRenderList(types[i], frame_id);
        if (!list_res.ok) {
            return (rt_create_render_view_result){
                .ok = false,
            };
        }
        view.lists[i] = list_res.list;
        view.list_types[i] = types[i];
        view.type_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]);
    }
    return (rt_create_render_view_result){.ok = true, .view = view};
 }
 RT_DLLEXPORT bool
 rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object) {
    for (uint32_t i = 0; i < view->list_count; ++i) {
        if (view->list_types[i] == type)
            return rtPushRenderListEntry(&view->lists[i], object);
    }
    return false;
 }
 RT_DLLEXPORT void
 rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id, unsigned int frame_id) {
    g_renderer.SubmitRenderView(render_graph, pass_id, view, frame_id);
 }
 RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view,
                                         const rt_render_object_type *types,
                                         uint32_t type_count) {
    if (view.list_count != type_count)
        return 0;
    for (uint32_t i = 0; i < type_count; ++i) {
        if (view.list_types[i] != types[i])
            return 0;
    }
    return 1;
 }
--- a/src/gfx/render_view.h
+++ b/src/gfx/render_view.h
@ -1,63 +0,0 @@
 #ifndef RT_GFX_RENDER_VIEW_H
 #define RT_GFX_RENDER_VIEW_H
 /* A render view acts as a container of one or more render lists.
 * Each view is processed by exactly one pass. */
 #include "render_list.h"
 typedef struct rt_render_graph_s rt_render_graph;
 typedef struct {
    rt_render_list *lists;
    rt_render_object_type *list_types;
    uint32_t list_count;
    rt_render_object_type_mask type_mask;
 } rt_render_view;
 typedef struct {
    bool ok;
    rt_render_view view;
 } rt_create_render_view_result;
 #ifdef __cplusplus
 extern "C" {
 #endif
 RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types,
                                                             uint32_t type_count,
                                                             unsigned int frame_id);
 RT_DLLEXPORT bool
 rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object);
 RT_DLLEXPORT void rtSubmitRenderView(rt_render_view view,
                                     rt_render_graph *render_graph,
                                     uint32_t pass_id,
                                     unsigned int frame_id);
 /* Checks if the view contains exactly the given types in the given order */
 RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view,
                                         const rt_render_object_type *types,
                                         uint32_t type_count);
 /* Checks if the view contains exactly the given types, in any order */
 RT_INLINE static int
 rtDoViewTypesMatch(rt_render_view view, const rt_render_object_type *types, uint32_t type_count) {
    rt_render_object_type_mask in_mask = 0;
    for (uint32_t i = 0; i < type_count; ++i) {
        in_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]);
    }
    return view.type_mask == in_mask;
 }
 RT_INLINE static int rtDoesViewContainTypes(rt_render_view view,
                                            rt_render_object_type_mask type_mask) {
    return (int)(view.type_mask & type_mask);
 }
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/gfx/renderer_api.h
+++ b/src/gfx/renderer_api.h
@ -1,367 +0,0 @@
 #ifndef RT_GFX_BACKEND_H
 #define RT_GFX_BACKEND_H
 /* Backend functions and types. */
 #include <stddef.h>
 #include "gfx.h"
 #include "render_list.h"
 #include "render_view.h"
 #include "runtime/resources.h"
 #include "runtime/rt_math.h"
 #include "runtime/runtime.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* Handles for backend objects */
 #define RT_RENDER_BACKEND_HANDLE_MAX_INDEX   ((1u << 24) - 1)
 #define RT_RENDER_BACKEND_HANDLE_MAX_VERSION 255
 #define RT_RENDER_BACKEND_HANDLE(name)                                                             \
    typedef struct {                                                                               \
        uint32_t version : 8;                                                                      \
        uint32_t index : 24;                                                                       \
    } name
 RT_RENDER_BACKEND_HANDLE(rt_pipeline_handle);
 RT_RENDER_BACKEND_HANDLE(rt_render_target_handle);
 RT_RENDER_BACKEND_HANDLE(rt_command_buffer_handle);
 RT_RENDER_BACKEND_HANDLE(rt_gpu_semaphore_handle);
 RT_RENDER_BACKEND_HANDLE(rt_buffer_handle);
 #undef RT_RENDER_BACKEND_HANDLE
 #define RT_COMPARE_RENDER_HANDLES(_A, _B, _Comp) ((*(uint32_t *)&(_A)) _Comp (*(uint32_t *)&(_B)))
 /* Init data for the renderer */
 #ifdef _WIN32
 struct HINSTANCE__;
 struct HWND__;
 #elif defined(RT_USE_XLIB)
 struct _XDisplay;
 #endif
 struct rt_renderer_init_info_s {
 #ifdef _WIN32
    struct HINSTANCE__ *hInstance;
    struct HWND__ *hWnd;
 #elif defined(RT_USE_XLIB)
    struct _XDisplay *display;
    unsigned long window;
 #endif
 };
 /* Argument types for render commands */
 typedef enum {
    RT_GRAPHICS_QUEUE,
    RT_COMPUTE_QUEUE,
    RT_TRANSFER_QUEUE,
 } rt_gpu_queue;
 #if 0
 /* Attributes are used to bind buffers (or textures) to symbolic values.
 * For example, an attribute might be bound to "CELL_GRID", which would be
 * replaced with the (at the time of the invoke) grid buffer of the current
 * world cell.
 */
 typedef enum {
    RT_ATTRIBUTE_VALUE_UNDEFINED,
    RT_ATTRIBUTE_VALUE_MATERIAL_ALBEDO,
    RT_ATTRIBUTE_VALUE_MATERIAL_NORMAL,
    RT_ATTRIBUTE_VALUE_count
 } rt_attribute_value;
 typedef struct {
    uint32_t index;
    rt_attribute_value value;
 } rt_attribute_binding;
 #endif
 typedef enum {
    RT_SHADER_TYPE_INVALID,
    RT_SHADER_TYPE_VULKAN,
    RT_SHADER_TYPE_DX11,
    RT_SHADER_TYPE_count,
 } rt_shader_type;
 typedef enum {
    RT_SHADER_STAGE_VERTEX,
    RT_SHADER_STAGE_FRAGMENT,
    RT_SHADER_STAGE_COMPUTE,
    RT_SHADER_STAGE_count,
 } rt_shader_stage;
 typedef struct {
    rt_shader_type type;
    rt_shader_stage stage;
    rt_relptr bytecode;
    size_t bytecode_length;
 } rt_shader_info;
 typedef struct {
    rt_gpu_queue target_queue;
 } rt_alloc_command_buffer_info;
 typedef struct {
    const rt_command_buffer_handle *command_buffers;
    const rt_gpu_semaphore_handle *wait_semaphores;
    const uint64_t *wait_values;
    const rt_gpu_semaphore_handle *signal_semaphores;
    const uint64_t *signal_values;
    uint32_t command_buffer_count;
    uint32_t wait_semaphore_count;
    uint32_t signal_semaphore_count;
 } rt_submit_command_buffers_info;
 typedef struct {
    /* Optional, for debug purposes */
    const char *name;
    uint64_t initial_value;
 } rt_gpu_semaphore_info;
 typedef enum {
    RT_BUFFER_TYPE_VERTEX,
    RT_BUFFER_TYPE_INDEX,
    RT_BUFFER_TYPE_UNIFORM,
    RT_BUFFER_TYPE_STORAGE,
    RT_BUFFER_TYPE_count
 } rt_buffer_type;
 typedef enum {
    /* Create once, never change the data. */
    RT_BUFFER_USAGE_STATIC,
    /* Update occasionally (after a number of frames) */
    RT_BUFFER_USAGE_DYNAMIC,
    /* Create, use once and then discard */
    RT_BUFFER_USAGE_TRANSIENT,
    RT_BUFFER_USAGE_count,
 } rt_buffer_usage;
 typedef struct {
    size_t size;
    rt_buffer_type type;
    rt_buffer_usage usage;
    const void *data;
 } rt_buffer_info;
 typedef enum {
    RT_PASS_LOAD_MODE_LOAD,
    RT_PASS_LOAD_MODE_CLEAR,
 } rt_pass_load_mode;
 typedef enum {
    RT_PASS_WRITE_MODE_STORE,
    RT_PASS_WRITE_MODE_DISCARD,
 } rt_pass_write_mode;
 typedef union {
    rt_color color;
    struct {
        float depth;
        int32_t stencil;
    } depth_stencil;
 } rt_pass_clear_value;
 typedef struct {
    float depth;
    int32_t stencil;
 } rt_depth_stencil_value;
 typedef struct {
    rt_render_target_handle color_buffers[4];
    rt_pass_load_mode color_buffer_loads[4];
    rt_pass_write_mode color_buffer_writes[4];
    rt_pass_clear_value color_buffer_clear_values[4];
    uint32_t color_buffer_count;
    rt_render_target_handle depth_stencil_buffer;
    rt_pass_load_mode depth_stencil_buffer_load;
    rt_pass_write_mode depth_stencil_buffer_write;
    rt_pass_clear_value depth_stencil_buffer_clear_value;
    rt_rect2i render_area;
    // For debug purposes, can be NULL
    const char *name;
 } rt_cmd_begin_pass_info;
 typedef enum {
    /* Unusable, must be transitioned to an usable state first. */
    RT_RENDER_TARGET_STATE_INVALID,
    /* Used as a color- or depth-buffer */
    RT_RENDER_TARGET_STATE_ATTACHMENT,
    RT_RENDER_TARGET_STATE_SAMPLED_IMAGE,
    RT_RENDER_TARGET_STATE_STORAGE_IMAGE,
 } rt_render_target_state;
 #define RT_RENDER_TARGET_SIZE_SWAPCHAIN 0
 /* Renderer API */
 typedef struct rt_pipeline_info_s rt_pipeline_info;
 typedef struct {
    const char *name;
    rt_pixel_format format;
    unsigned int width;
    unsigned int height;
    unsigned int samples;
    unsigned int layers;
 } rt_attachment_info;
 enum {
    /* Bit 0 contains the type: 0 -> graphics, 1 -> compute */
    RT_PASS_FLAG_GRAPHICS  = 0x0000,
    RT_PASS_FLAG_COMPUTE   = 0x0001,
    RT_PASS_FLAG_TYPE_MASK = RT_PASS_FLAG_COMPUTE | RT_PASS_FLAG_GRAPHICS,
    /* Always excecute the pass, even if no objects will be rendered. */
    RT_PASS_FLAG_EXECUTE_ALWAYS = 0x0002,
 };
 typedef struct {
    const char *name;
    uint32_t flags;
 } rt_pass_info;
 typedef struct rt_render_graph_s rt_render_graph;
 typedef rt_result rt_execute_render_pass_fn(uint32_t pass_id,
                                            rt_command_buffer_handle cmdbuf,
                                            const rt_render_view *views,
                                            unsigned int view_count,
                                            void *userdata);
 typedef struct {
    void *obj;
    void (*AddRenderTarget)(void *obj, const rt_attachment_info *info);
    void (*SetBackbuffer)(void *obj, const char *rt_name);
    void (*AddRenderPass)(void *obj, const rt_pass_info *info);
    void (*AddColorOutput)(void *obj,
                           const char *pass_name,
                           const char *rt_name,
                           rt_pass_load_mode load,
                           rt_pass_write_mode write,
                           rt_color clear_color);
    void (*AddSampledInput)(void *obj, const char *pass_name, const char *rt_name);
    void (*SetDepthStencilAttachment)(void *obj,
                                      const char *pass_name,
                                      const char *rt_name,
                                      rt_pass_load_mode load,
                                      rt_pass_write_mode write,
                                      rt_depth_stencil_value clear_value);
    void (*SetRenderArea)(void *obj, const char *pass_name, rt_rect2 area, float min_depth, float max_depth);
    void (*BindRenderPass)(void *obj,
                           const char *pass_name,
                           rt_execute_render_pass_fn *execute_fn,
                           void *userdata);
    rt_result (*Build)(void *obj, rt_render_graph **p_render_graph);
 } rt_render_graph_builder;
 typedef void rt_register_renderer_cvars_fn(void);
 typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info);
 typedef void rt_shutdown_renderer_fn(void);
 typedef unsigned int rt_get_max_frames_in_flight_fn(void);
 typedef void rt_begin_frame_fn(unsigned int frame_id);
 typedef void rt_end_frame_fn(unsigned int frame_id);
 typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info);
 typedef void rt_destroy_pipeline_fn(rt_pipeline_handle handle);
 typedef rt_result rt_alloc_command_buffers_fn(uint32_t count,
                                              const rt_alloc_command_buffer_info *info,
                                              rt_command_buffer_handle *p_command_buffers);
 typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue,
                                               const rt_submit_command_buffers_info *info);
 typedef rt_result
 rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers);
 typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers);
 typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void);
 typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder);
 typedef rt_result rt_execute_render_graph_fn(rt_render_graph *rgraph, unsigned int frame_id);
 typedef void
 rt_submit_render_view_fn(rt_render_graph *render_graph, uint32_t pass_id, rt_render_view view, unsigned int frame_id);
 typedef void rt_reset_render_graph_fn(rt_render_graph *graph, unsigned int frame_id);
 typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf,
                                  const rt_cmd_begin_pass_info *info);
 typedef void rt_cmd_end_pass_fn(rt_command_buffer_handle cmdbuf);
 typedef void rt_cmd_transition_render_target_fn(rt_command_buffer_handle cmdbuf,
                                                rt_render_target_handle render_target,
                                                rt_render_target_state new_state);
 typedef void rt_cmd_flush_render_target_write_fn(rt_command_buffer_handle cmdbuf,
                                                 rt_render_target_handle render_target);
 typedef void rt_cmd_bind_pipeline_fn(rt_command_buffer_handle cmd, rt_pipeline_handle pipeline);
 typedef void rt_cmd_bind_vertex_buffers_fn(rt_command_buffer_handle cmd,
                                           uint32_t first_binding,
                                           uint32_t count,
                                           const rt_buffer_handle *buffers,
    const uint32_t *strides,
                                           const uint32_t *offsets);
 typedef void
 rt_cmd_draw_fn(rt_command_buffer_handle cmdbuf, uint32_t first_vertex, uint32_t vertex_count);
 typedef struct {
    rt_register_renderer_cvars_fn *RegisterCVars;
    rt_init_renderer_fn *Init;
    rt_shutdown_renderer_fn *Shutdown;
    rt_get_max_frames_in_flight_fn *GetMaxFramesInFlight;
    rt_begin_frame_fn *BeginFrame;
    rt_end_frame_fn *EndFrame;
    rt_compile_pipeline_fn *CompilePipeline;
    rt_destroy_pipeline_fn *DestroyPipeline;
    rt_alloc_command_buffers_fn *AllocCommandBuffers;
    rt_submit_command_buffers_fn *SubmitCommandBuffers;
    rt_create_buffers_fn *CreateBuffers;
    rt_destroy_buffers_fn *DestroyBuffers;
    /*render graph functions*/
    rt_create_render_graph_builder_fn *CreateRenderGraphBuilder;
    rt_destroy_render_graph_builder_fn *DestroyRenderGraphBuilder;
    rt_execute_render_graph_fn *ExecuteRenderGraph;
    rt_submit_render_view_fn *SubmitRenderView;
    rt_reset_render_graph_fn *ResetRenderGraph;
    /* Command Buffer Functions */
    rt_cmd_begin_pass_fn *CmdBeginPass;
    rt_cmd_end_pass_fn *CmdEndPass;
    rt_cmd_transition_render_target_fn *CmdTransitionRenderTarget;
    rt_cmd_flush_render_target_write_fn *CmdFlushRenderTargetWrite;
    rt_cmd_bind_pipeline_fn *CmdBindPipeline;
    rt_cmd_bind_vertex_buffers_fn *CmdBindVertexBuffers;
    rt_cmd_draw_fn *CmdDraw;
 } rt_renderer_api;
 #define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name
 #ifndef RT_DONT_DEFINE_RENDERER_GLOBAL
 extern rt_renderer_api g_renderer;
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/meson.build
+++ b/src/meson.build
@ -1,9 +1,3 @@
 subdir('runtime')
 subdir('asset_compiler')
 subdir('gfx')
 subdir('app_framework')
 # Renderer libs
 subdir('renderer/vk')
 subdir('renderer/null')
 subdir('renderer/dx11')
--- a/src/renderer/common/common_render_graph.c
+++ b/src/renderer/common/common_render_graph.c
@ -1,877 +0,0 @@
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 #include "gfx/effect.h"
 #include "gfx/renderer_api.h"
 #include "runtime/buffer_manager.h"
 #include "runtime/handles.h"
 #include "runtime/mem_arena.h"
 #include "common_render_graph.h"
 #define MAX_COLOR_ATTACHMENTS_PER_PASS 8
 #define MAX_SAMPLED_INPUTS_PER_PASS    8
 typedef struct rt_render_target_build_info {
    const char *name;
    rt_pixel_format format;
    unsigned int width;
    unsigned int height;
    unsigned int samples;
    unsigned int layers;
    uint32_t first_usage;
    uint32_t last_usage;
 } rt_render_target_build_info;
 typedef struct rt_pass_build_info {
    const char *name;
    uint32_t flags;
    void *userdata;
    rt_execute_render_pass_fn *Execute;
    rt_rect2 render_area;
    float min_depth;
    float max_depth;
    uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS];
    rt_color color_attachment_clear_values[MAX_COLOR_ATTACHMENTS_PER_PASS];
    rt_pass_load_mode color_attachment_loads[MAX_COLOR_ATTACHMENTS_PER_PASS];
    rt_pass_write_mode color_attachment_writes[MAX_COLOR_ATTACHMENTS_PER_PASS];
    uint32_t color_attachment_count;
    uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS];
    uint32_t sampled_input_count;
    uint32_t depth_stencil_attachment;
    rt_depth_stencil_value depth_stencil_clear_value;
    rt_pass_load_mode depth_stencil_load;
    rt_pass_write_mode depth_stencil_write;
    uint32_t *dependencies;
    uint32_t dependency_count;
 } rt_pass_build_info;
 typedef struct {
    uint32_t signaled_by;
    uint32_t waited_on_by;
 } rt_sync_point_build_info;
 typedef struct rt_render_graph_builder_obj {
    rt_arena arena;
    rt_render_target_build_info *render_targets;
    uint32_t render_target_count;
    uint32_t render_target_capacity;
    rt_pass_build_info *passes;
    uint32_t pass_count;
    uint32_t pass_capacity;
    rt_physical_render_target_info *phys_render_targets;
    uint32_t phys_render_target_count;
    rt_sync_point_build_info *sync_points;
    uint32_t sync_point_count;
    uint32_t backbuffer;
    rt_render_graph_builder_platform_callbacks platform_cbs;
 } rt_render_graph_builder_obj;
 static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
    rt_render_graph_builder_obj *obj = _obj;
    if (obj->render_target_count == obj->render_target_capacity) {
        uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32;
        rt_render_target_build_info *tmp =
            RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_render_target_build_info, new_cap);
        if (obj->render_target_capacity)
            memcpy(tmp,
                   obj->render_targets,
                   sizeof(rt_render_target_build_info) * obj->render_target_capacity);
        obj->render_targets         = tmp;
        obj->render_target_capacity = new_cap;
    }
    char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
    strcpy(name, info->name);
    obj->render_targets[obj->render_target_count].name        = name;
    obj->render_targets[obj->render_target_count].format      = info->format;
    obj->render_targets[obj->render_target_count].width       = info->width;
    obj->render_targets[obj->render_target_count].height      = info->height;
    obj->render_targets[obj->render_target_count].samples     = info->samples;
    obj->render_targets[obj->render_target_count].layers      = info->layers;
    obj->render_targets[obj->render_target_count].first_usage = 0;
    obj->render_targets[obj->render_target_count].last_usage  = 0;
    ++obj->render_target_count;
 }
 static void SetBackbuffer(void *_obj, const char *rt_name) {
    rt_render_graph_builder_obj *obj = _obj;
    for (uint32_t i = 0; i < obj->render_target_count; ++i) {
        if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
            obj->backbuffer = i;
            return;
        }
    }
    rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name);
 }
 static void AddRenderPass(void *_obj, const rt_pass_info *info) {
    rt_render_graph_builder_obj *obj = _obj;
    if (obj->pass_count == obj->pass_capacity) {
        uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32;
        rt_pass_build_info *tmp =
            RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_pass_build_info, new_cap);
        if (obj->pass_capacity)
            memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity);
        obj->passes        = tmp;
        obj->pass_capacity = new_cap;
    }
    char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
    strcpy(name, info->name);
    obj->passes[obj->pass_count].name                     = name;
    obj->passes[obj->pass_count].flags                    = info->flags;
    obj->passes[obj->pass_count].color_attachment_count   = 0;
    obj->passes[obj->pass_count].sampled_input_count      = 0;
    obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX;
    obj->passes[obj->pass_count].dependencies             = NULL;
    obj->passes[obj->pass_count].dependency_count         = 0;
    ++obj->pass_count;
 }
 static void AddColorOutput(void *_obj,
                           const char *pass_name,
                           const char *rt_name,
                           rt_pass_load_mode load,
                           rt_pass_write_mode write,
                           rt_color clear_color) {
    uint32_t rt_index = UINT_MAX;
    rt_render_graph_builder_obj *obj = _obj;
    for (uint32_t i = 0; i < obj->render_target_count; ++i) {
        if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
            rt_index = i;
            break;
        }
    }
    if (rt_index == UINT_MAX) {
        rtLog("ren",
              "Tried to add unknown render target %s as color output to %s",
              rt_name,
              pass_name);
        return;
    }
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        if (strcmp(obj->passes[i].name, pass_name) == 0) {
            if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) {
                rtLog("ren", "Too many color attachments in pass %s", pass_name);
            }
            obj->passes[i].color_attachment_clear_values[obj->passes[i].color_attachment_count] =
                clear_color;
            obj->passes[i].color_attachment_loads[obj->passes[i].color_attachment_count]  = load;
            obj->passes[i].color_attachment_writes[obj->passes[i].color_attachment_count] = write;
            obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index;
            return;
        }
    }
    rtLog("ren",
          "Tried to add render target %s as color output to unknown render target %s",
          rt_name,
          pass_name);
 }
 static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) {
    uint32_t rt_index = UINT_MAX;
    rt_render_graph_builder_obj *obj = _obj;
    for (uint32_t i = 0; i < obj->render_target_count; ++i) {
        if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
            rt_index = i;
            break;
        }
    }
    if (rt_index == UINT_MAX) {
        rtLog("ren",
              "Tried to add unknown render target %s as color output to %s",
              rt_name,
              pass_name);
        return;
    }
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        if (strcmp(obj->passes[i].name, pass_name) == 0) {
            if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) {
                rtLog("ren", "Too many sampled inputs in pass %s", pass_name);
            }
            obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index;
            return;
        }
    }
    rtLog("ren",
          "Tried to add render target %s as sampled input to unknown render target %s",
          rt_name,
          pass_name);
 }
 static void SetDepthStencilAttachment(void *_obj,
                                      const char *pass_name,
                                      const char *rt_name,
                                      rt_pass_load_mode load,
                                      rt_pass_write_mode write,
                                      rt_depth_stencil_value clear_value) {
    uint32_t rt_index = UINT_MAX;
    rt_render_graph_builder_obj *obj = _obj;
    for (uint32_t i = 0; i < obj->render_target_count; ++i) {
        if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
            rt_index = i;
            break;
        }
    }
    if (rt_index == UINT_MAX) {
        rtLog("ren",
              "Tried to add unknown render target %s as depth stencil attachment to %s",
              rt_name,
              pass_name);
        return;
    }
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        if (strcmp(obj->passes[i].name, pass_name) == 0) {
            obj->passes[i].depth_stencil_attachment  = rt_index;
            obj->passes[i].depth_stencil_clear_value = clear_value;
            obj->passes[i].depth_stencil_load        = load;
            obj->passes[i].depth_stencil_write       = write;
            return;
        }
    }
    rtLog("ren",
          "Tried to add render target %s as  depth stencil attachment to unknown render target %s",
          rt_name,
          pass_name);
 }
 static void SetRenderArea(void *_obj, const char *pass_name, rt_rect2 area, float min_depth, float max_depth) {
    rt_render_graph_builder_obj *obj = _obj;
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        if (strcmp(obj->passes[i].name, pass_name) == 0) {
            obj->passes[i].render_area = area;
            obj->passes[i].min_depth   = min_depth;
            obj->passes[i].max_depth   = max_depth;
            return;
        }
    }
    rtLog("ren", "Tried to bind unknown render pass %s.", pass_name);
 }
 static void BindRenderPass(void *_obj,
                           const char *pass_name,
                           rt_execute_render_pass_fn *execute_fn,
                           void *userdata) {
    rt_render_graph_builder_obj *obj = _obj;
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        if (strcmp(obj->passes[i].name, pass_name) == 0) {
            obj->passes[i].Execute  = execute_fn;
            obj->passes[i].userdata = userdata;
            return;
        }
    }
    rtLog("ren", "Tried to bind unknown render pass %s.", pass_name);
 }
 typedef struct {
    uint32_t added;
    uint32_t moved;
 } rt_find_writers_result;
 static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj,
                                          uint32_t rt_index,
                                          uint32_t append_at,
                                          uint32_t *p_passes) {
    rt_find_writers_result res = {0, 0};
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        bool writes_rt = false;
        if (obj->passes[i].depth_stencil_attachment == rt_index) {
            writes_rt = true;
        } else {
            for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) {
                if (obj->passes[i].color_attachments[j] == rt_index) {
                    writes_rt = true;
                }
            }
        }
        if (!writes_rt)
            continue;
        uint32_t lower_index = UINT32_MAX;
        for (uint32_t j = 0; j < append_at; ++j) {
            if (p_passes[j] == i) {
                lower_index = j;
                break;
            }
        }
        if (lower_index == UINT32_MAX) {
            p_passes[append_at++] = i;
            res.added++;
        } else {
            memmove(&p_passes[lower_index],
                    &p_passes[lower_index + 1],
                    (append_at - lower_index - 1) * sizeof(uint32_t));
            p_passes[append_at - 1] = i;
            res.moved++;
        }
    }
    return res;
 }
 static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj,
                                    uint32_t search_rt,
                                    uint32_t append_at,
                                    uint32_t *p_order) {
    rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order);
    uint32_t new_append            = append_at + writers.added;
    for (uint32_t i = 0; i < writers.moved; ++i) {
        uint32_t pass_idx              = p_order[append_at - writers.moved + i];
        const rt_pass_build_info *pass = &obj->passes[pass_idx];
        for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
            new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
        }
    }
    for (uint32_t i = 0; i < writers.added; ++i) {
        uint32_t pass_idx              = p_order[append_at + i];
        const rt_pass_build_info *pass = &obj->passes[pass_idx];
        for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
            new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
        }
    }
    return new_append;
 }
 static rt_result
 CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) {
    uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count);
    if (!order)
        return RT_OUT_OF_MEMORY;
    uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order);
    /* Now the pass writing the backbuffer is first, we need to revert the order */
    for (uint32_t i = 0; i < count / 2; ++i) {
        uint32_t t           = order[i];
        order[i]             = order[count - i - 1];
        order[count - i - 1] = t;
    }
    *p_order = order;
    *p_count = count;
    return RT_SUCCESS;
 }
 static uint32_t *
 ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) {
    /* Our goal is to calculate a schedule that:
     * A) Does not break the dependency chain
     * B) Has the maximum amount of overlap, i.e. keeps the GPU busy.
     *    This means that if pass A depends on pass B, we want to have as much passes inbetween as
     *    possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */
    uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count);
    if (!schedule)
        return NULL;
    uint32_t scheduled_count = 0;
    while (scheduled_count < pass_count) {
        /* The number of passes remaining in naive_order */
        uint32_t unscheduled_count = pass_count - scheduled_count;
        /* It is always valid to use the front */
        uint32_t selected_idx   = 0;
        uint32_t selected_score = 0;
        for (uint32_t i = 0; i < unscheduled_count; ++i) {
            /* Check if any dependency is not scheduled yet */
            uint32_t pass_idx              = naive_order[i];
            const rt_pass_build_info *pass = &obj->passes[pass_idx];
            uint32_t score                 = 0;
            bool is_valid                  = true;
            if (pass->dependency_count) {
                for (uint32_t j = 0; j < unscheduled_count; ++j) {
                    uint32_t pass2_idx = naive_order[j];
                    for (uint32_t k = 0; k < pass->dependency_count; ++k) {
                        if (pass->dependencies[k] == pass2_idx) {
                            is_valid = false;
                            break;
                        }
                    }
                    if (!is_valid)
                        break;
                }
                if (!is_valid)
                    continue;
                for (uint32_t j = 0; j < pass->dependency_count; ++j) {
                    for (uint32_t k = 0; k < scheduled_count; ++k) {
                        if (schedule[k] == pass->dependencies[j]) {
                            score += scheduled_count - k;
                            break;
                        }
                    }
                }
            } else {
                score = UINT32_MAX;
            }
            if (score > selected_score) {
                selected_score = score;
                selected_idx   = i;
            }
        }
        schedule[scheduled_count++] = naive_order[selected_idx];
        memmove(&naive_order[selected_idx],
                &naive_order[selected_idx + 1],
                (unscheduled_count - selected_idx - 1) * sizeof(uint32_t));
    }
    return schedule;
 }
 static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) {
    /* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine
     * the two */
    for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) {
        rt_pass_build_info *pass     = &obj->passes[pass_idx];
        uint32_t dependency_capacity = pass->sampled_input_count;
        if (dependency_capacity) {
            pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity);
            if (!pass->dependencies)
                return RT_OUT_OF_MEMORY;
        }
        for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) {
            uint32_t rt_index = pass->sampled_inputs[input_idx];
            for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) {
                const rt_pass_build_info *candidate = &obj->passes[candidate_idx];
                bool is_dependency                  = false;
                if (candidate->depth_stencil_attachment == rt_index)
                    is_dependency = true;
                for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) {
                    if (candidate->color_attachments[j] == rt_index)
                        is_dependency = true;
                }
                if (!is_dependency)
                    continue;
                if (pass->dependency_count == dependency_capacity) {
                    /* The dependencies are still on top of the arena, so we can just grow that
                     * array */
                    if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity))
                        return RT_OUT_OF_MEMORY;
                    dependency_capacity *= 2;
                }
                pass->dependencies[pass->dependency_count++] = candidate_idx;
            }
        }
    }
    return RT_SUCCESS;
 }
 static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj,
                                       uint32_t pass_count,
                                       const uint32_t *schedule) {
    for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) {
        rt_render_target_build_info *rt = &obj->render_targets[rt_idx];
        rt->first_usage                 = UINT32_MAX;
        rt->last_usage                  = 0;
        for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) {
            uint32_t pass_idx              = schedule[sched_idx];
            const rt_pass_build_info *pass = &obj->passes[pass_idx];
            bool usage                     = pass->depth_stencil_attachment == rt_idx;
            if (!usage) {
                for (unsigned int i = 0; i < pass->color_attachment_count; ++i) {
                    if (pass->color_attachments[i] == rt_idx)
                        usage = true;
                }
            }
            if (!usage) {
                for (unsigned int i = 0; i < pass->sampled_input_count; ++i) {
                    if (pass->sampled_inputs[i] == rt_idx)
                        usage = true;
                }
            }
            if (usage) {
                if (sched_idx < rt->first_usage)
                    rt->first_usage = sched_idx;
                if (sched_idx > rt->last_usage)
                    rt->last_usage = sched_idx;
            }
        }
    }
 }
 static rt_result GreedyMergeRenderTargets(rt_render_graph_builder_obj *obj) {
    typedef struct {
        rt_physical_render_target_info info;
        int alive;
        int backbuffer;
        uint32_t first_usage;
        uint32_t last_usage;
    } merged_rts;
    merged_rts *merged = RT_ARENA_PUSH_ARRAY(&obj->arena, merged_rts, 2 * obj->render_target_count);
    if (!merged) {
        return RT_OUT_OF_MEMORY;
    }
    uint32_t candidate_count = obj->render_target_count;
    for (uint32_t i = 0; i < candidate_count; ++i) {
        merged[i].alive        = 1;
        merged[i].backbuffer   = (i == obj->backbuffer);
        merged[i].info.format  = obj->render_targets[i].format;
        merged[i].info.width   = obj->render_targets[i].width;
        merged[i].info.height  = obj->render_targets[i].height;
        merged[i].info.layers  = obj->render_targets[i].layers;
        merged[i].info.name    = obj->render_targets[i].name;
        merged[i].info.samples = obj->render_targets[i].samples;
        merged[i].first_usage  = obj->render_targets[i].first_usage;
        merged[i].last_usage   = obj->render_targets[i].last_usage;
    }
    uint32_t *rt_mapping =
        RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->render_target_count);
    if (!rt_mapping)
        return RT_OUT_OF_MEMORY;
    for (uint32_t i = 0; i < obj->render_target_count; ++i)
        rt_mapping[i] = i;
    bool did_merge;
    do {
        did_merge = false;
        for (uint32_t first = 0; first < candidate_count - 1; ++first) {
            if (!merged[first].alive)
                continue;
            for (uint32_t second = first + 1; second < candidate_count; ++second) {
                if (!merged[second].alive)
                    continue;
                if (!((merged[first].last_usage < merged[second].first_usage) ||
                      (merged[second].last_usage < merged[first].first_usage)))
                    continue;
                if (!(merged[first].info.width == merged[second].info.width &&
                      merged[first].info.height == merged[second].info.height &&
                      merged[first].info.samples == merged[second].info.samples &&
                      merged[first].info.layers == merged[second].info.layers &&
                      merged[first].info.format == merged[second].info.format))
                    continue;
                merged[first].alive  = 0;
                merged[second].alive = 0;
                merged_rts combined = {
                    .alive       = 1,
                    .backbuffer  = merged[first].backbuffer || merged[second].backbuffer,
                    .first_usage = RT_MIN(merged[first].first_usage, merged[second].first_usage),
                    .last_usage  = RT_MAX(merged[first].last_usage, merged[second].last_usage),
                    .info        = merged[first].info,
                };
                char *combined_name = rtArenaPush(&obj->arena,
                                                  strlen(merged[first].info.name) +
                                                      strlen(merged[second].info.name) + 2);
                if (!combined_name)
                    return RT_OUT_OF_MEMORY;
                strcpy(combined_name, merged[first].info.name);
                strcat(combined_name, "+");
                strcat(combined_name, merged[second].info.name);
                combined.info.name = combined_name;
                /* Update mappings. If indes < render_target_count, than it refers to a
                 * logical render target. If not, it refers to a merged render target */
                if (first < obj->render_target_count) {
                    rt_mapping[first] = candidate_count;
                } else {
                    // Find mappings that refer to this index and update them
                    for (uint32_t i = 0; i < obj->render_target_count; ++i) {
                        if (rt_mapping[i] == first)
                            rt_mapping[i] = candidate_count;
                    }
                }
                if (second < obj->render_target_count) {
                    rt_mapping[second] = candidate_count;
                } else {
                    // Find mappings that refer to this index and update them
                    for (uint32_t i = 0; i < obj->render_target_count; ++i) {
                        if (rt_mapping[i] == second)
                            rt_mapping[i] = candidate_count;
                    }
                }
                RT_ASSERT(candidate_count < 2 * obj->render_target_count, "");
                merged[candidate_count++] = combined;
                did_merge                 = true;
                break;
            }
            if (did_merge)
                break;
        }
    } while (did_merge);
    uint32_t phys_count = 0;
    for (uint32_t i = 0; i < candidate_count; ++i) {
        if (merged[i].alive)
            ++phys_count;
    }
    obj->phys_render_targets =
        RT_ARENA_PUSH_ARRAY(&obj->arena, rt_physical_render_target_info, phys_count);
    if (!obj->phys_render_targets)
        return RT_OUT_OF_MEMORY;
    obj->phys_render_target_count = 0;
    for (uint32_t i = 0; i < candidate_count; ++i) {
        if (merged[i].alive) {
            uint32_t index = obj->phys_render_target_count;
            if (merged[i].backbuffer)
                obj->backbuffer = obj->phys_render_target_count;
            obj->phys_render_targets[obj->phys_render_target_count++] = merged[i].info;
            /* Update the mapping table */
            for (uint32_t j = 0; j < obj->render_target_count; ++j) {
                if (rt_mapping[j] == i)
                    rt_mapping[j] = index;
            }
        }
    }
    /* Update pass render target references */
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        if (obj->passes[i].depth_stencil_attachment < UINT_MAX)
            obj->passes[i].depth_stencil_attachment =
                rt_mapping[obj->passes[i].depth_stencil_attachment];
        for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j)
            obj->passes[i].color_attachments[j] = rt_mapping[obj->passes[i].color_attachments[j]];
        for (uint32_t j = 0; j < obj->passes[i].sampled_input_count; ++j)
            obj->passes[i].sampled_inputs[j] = rt_mapping[obj->passes[i].sampled_inputs[j]];
    }
    obj->backbuffer = rt_mapping[obj->backbuffer];
    return RT_SUCCESS;
 }
 static rt_result CreateSynchronizationPoints(rt_render_graph_builder_obj *obj) {
    RT_ASSERT(false, "Not implemented yet");
    return RT_UNKNOWN_ERROR;
 }
 static uint32_t *DeterminePassExecutionLevels(rt_render_graph_builder_obj *obj,
                                              const uint32_t *schedule) {
    uint32_t *execution_levels = RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->pass_count);
    if (!execution_levels)
        return NULL;
    const rt_pass_build_info *passes = obj->passes;
    uint32_t pass_count              = obj->pass_count;
    for (uint32_t i = 0; i < pass_count; ++i) {
        uint32_t level    = 0;
        uint32_t pass_idx = schedule[i];
        for (uint32_t j = 0; j < passes[pass_idx].dependency_count; ++j) {
            uint32_t dep_idx = passes[pass_idx].dependencies[j];
            level            = RT_MAX(execution_levels[dep_idx] + 1, level);
        }
        execution_levels[pass_idx] = level;
    }
    return execution_levels;
 }
 static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj,
                                          const uint32_t *order,
                                          const uint32_t *execution_levels) {
    size_t runtime_data_size = obj->platform_cbs.GetRuntimeDataSize();
    size_t required_size     = sizeof(rt_render_graph);
    required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
    required_size += obj->sync_point_count * sizeof(rt_gpu_semaphore_handle);
    required_size += obj->pass_count * sizeof(rt_render_pass);
    required_size += obj->pass_count * runtime_data_size;
    size_t pass_attachment_size = 0;
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        required_size += strlen(obj->passes[i].name) + 1;
        pass_attachment_size += obj->passes[i].color_attachment_count *
                                (sizeof(rt_render_target_handle) + sizeof(rt_color) +
                                 sizeof(rt_pass_load_mode) + sizeof(rt_pass_write_mode));
        pass_attachment_size +=
            obj->passes[i].sampled_input_count * sizeof(rt_render_target_handle);
    }
    required_size += pass_attachment_size;
    rt_render_graph *graph = rtAllocBuffer(required_size);
    if (!graph)
        return NULL;
    memset(graph, 0, required_size);
    graph->render_targets = (rt_render_target_handle *)(graph + 1);
    graph->semaphores =
        (rt_gpu_semaphore_handle *)(graph->render_targets + obj->phys_render_target_count);
    graph->passes            = (rt_render_pass *)(graph->semaphores + obj->sync_point_count);
    char *attachment_storage = (char *)(graph->passes + obj->pass_count);
    char *runtime_data       = attachment_storage + pass_attachment_size;
    char *names              = runtime_data + runtime_data_size * obj->pass_count;
    char *next_name          = names;
    graph->render_target_count = obj->phys_render_target_count;
    graph->semaphore_count     = obj->sync_point_count;
    graph->pass_count          = obj->pass_count;
    for (uint32_t i = 0; i < obj->phys_render_target_count; ++i) {
        graph->render_targets[i] =
            obj->platform_cbs.CreateRenderTarget(&obj->phys_render_targets[i]);
    }
    for (uint32_t i = 0; i < obj->sync_point_count; ++i) {
        // TODO
        RT_NOT_IMPLEMENTED;
    }
    for (uint32_t i = 0; i < obj->pass_count; ++i) {
        uint32_t passidx              = order[i];
        size_t namelen                = strlen(obj->passes[passidx].name);
        graph->passes[i].Execute      = RT_VERIFY(obj->passes[passidx].Execute);
        graph->passes[i].user_data    = obj->passes[passidx].userdata;
        graph->passes[i].flags        = obj->passes[passidx].flags;
        graph->passes[i].id           = rtCalculateRenderPassID(obj->passes[passidx].name, namelen);
        graph->passes[i].first_signal = 0;
        graph->passes[i].signal_count = 0;
        graph->passes[i].first_wait   = 0;
        graph->passes[i].wait_count   = 0;
        graph->passes[i].execution_level = execution_levels[passidx];
        graph->passes[i].render_area     = obj->passes[passidx].render_area;
        graph->passes[i].min_depth       = obj->passes[passidx].min_depth;
        graph->passes[i].max_depth       = obj->passes[passidx].max_depth;
        graph->passes[i].depth_stencil =
            (obj->passes[i].depth_stencil_attachment != UINT_MAX)
                ? graph->render_targets[obj->passes[i].depth_stencil_attachment]
                : (rt_render_target_handle)RT_INVALID_HANDLE;
        graph->passes[i].depth_stencil_clear_value = obj->passes[i].depth_stencil_clear_value;
        graph->passes[i].depth_stencil_load        = obj->passes[i].depth_stencil_load;
        graph->passes[i].depth_stencil_write       = obj->passes[i].depth_stencil_write;
        graph->passes[i].color_output_count = obj->passes[i].color_attachment_count;
        if (graph->passes[i].color_output_count) {
            graph->passes[i].color_outputs = (rt_render_target_handle *)attachment_storage;
            attachment_storage +=
                sizeof(rt_render_target_handle) * graph->passes[i].color_output_count;
            graph->passes[i].color_clear_values = (rt_color *)attachment_storage;
            attachment_storage += sizeof(rt_color) * graph->passes[i].color_output_count;
            graph->passes[i].color_loads = (rt_pass_load_mode *)attachment_storage;
            attachment_storage += sizeof(rt_pass_load_mode) * graph->passes[i].color_output_count;
            graph->passes[i].color_writes = (rt_pass_write_mode *)attachment_storage;
            attachment_storage += sizeof(rt_pass_write_mode) * graph->passes[i].color_output_count;
            for (uint32_t j = 0; j < graph->passes[i].color_output_count; ++j) {
                graph->passes[i].color_outputs[j] =
                    graph->render_targets[obj->passes[i].color_attachments[j]];
                graph->passes[i].color_clear_values[j] =
                    obj->passes[i].color_attachment_clear_values[j];
                graph->passes[i].color_loads[j]  = obj->passes[i].color_attachment_loads[j];
                graph->passes[i].color_writes[j] = obj->passes[i].color_attachment_writes[j];
            }
        }
        graph->passes[i].sampled_input_count = obj->passes[i].sampled_input_count;
        if (graph->passes[i].sampled_input_count) {
            graph->passes[i].sampled_inputs = (rt_render_target_handle *)attachment_storage;
            attachment_storage +=
                sizeof(rt_render_target_handle) * graph->passes[i].sampled_input_count;
            for (uint32_t j = 0; j < graph->passes[i].sampled_input_count; ++j) {
                graph->passes[i].sampled_inputs[j] =
                    graph->render_targets[obj->passes[i].sampled_inputs[j]];
            }
        }
        graph->passes[i].runtime_data = (void *)(runtime_data + i * runtime_data_size);
        graph->passes[i].name = next_name;
        next_name += namelen + 1;
        memcpy((char *)graph->passes[i].name, obj->passes[passidx].name, namelen + 1);
    }
    graph->backbuffer_index = obj->backbuffer;
    return graph;
 }
 static rt_result Build(void *_obj, rt_render_graph **p_graph) {
    rt_render_graph_builder_obj *obj = _obj;
    uint32_t *naive_order;
    uint32_t pass_count;
    rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count);
    if (res != RT_SUCCESS)
        return res;
    res = DeterminePassDependencies(obj);
    if (res != RT_SUCCESS)
        return res;
    uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order);
    if (!optimized_order)
        return RT_OUT_OF_MEMORY;
    DetermineRenderTargetUsage(obj, pass_count, optimized_order);
    res = GreedyMergeRenderTargets(obj);
    if (res != RT_SUCCESS)
        return res;
    uint32_t *execution_levels = DeterminePassExecutionLevels(obj, optimized_order);
    if (!execution_levels)
        return RT_OUT_OF_MEMORY;
    if (obj->platform_cbs.RequireExplicitSynchronization()) {
        res = CreateSynchronizationPoints(obj);
        if (res != RT_SUCCESS)
            return res;
    } else {
        obj->sync_point_count = 0;
    }
    *p_graph = CreateRenderGraph(obj, optimized_order, execution_levels);
    return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR;
 }
 rt_render_graph_builder
 rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs) {
    // TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions
    rt_render_graph_builder_obj *obj = malloc(sizeof(*obj));
    RT_ASSERT(obj, "Failed to allocate the builder object.");
    memset(obj, 0, sizeof(*obj));
    rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16));
    RT_ASSERT(arena_res.ok, "");
    obj->arena        = arena_res.arena;
    obj->platform_cbs = *platform_cbs;
    return (rt_render_graph_builder){
        .obj                       = obj,
        .AddRenderTarget           = AddRenderTarget,
        .SetBackbuffer             = SetBackbuffer,
        .AddRenderPass             = AddRenderPass,
        .AddColorOutput            = AddColorOutput,
        .AddSampledInput           = AddSampledInput,
        .SetDepthStencilAttachment = SetDepthStencilAttachment,
        .SetRenderArea             = SetRenderArea,
        .BindRenderPass            = BindRenderPass,
        .Build                     = Build,
    };
 }
 void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder) {
    rt_render_graph_builder_obj *obj = builder->obj;
    rtReleaseArena(&obj->arena);
    free(obj);
    memset(builder, 0, sizeof(*builder));
 }
--- a/src/renderer/common/common_render_graph.h
+++ b/src/renderer/common/common_render_graph.h
@ -1,99 +0,0 @@
 #ifndef RT_RENDERER_COMMON_RENDER_GRAPH_H
 #define RT_RENDERER_COMMON_RENDER_GRAPH_H
 #include "gfx/renderer_api.h"
 #include "runtime/mem_arena.h"
 typedef struct {
    const char *name;
    rt_pixel_format format;
    unsigned int width;
    unsigned int height;
    unsigned int samples;
    unsigned int layers;
 } rt_physical_render_target_info;
 typedef rt_render_target_handle
 rt_rgb_create_render_target_fn(const rt_physical_render_target_info *rt_info);
 typedef int rt_rgb_require_explicit_synchronization_fn(void);
 typedef size_t rt_rgb_get_runtime_data_size_fn(void);
 typedef struct {
    rt_rgb_create_render_target_fn *CreateRenderTarget;
    rt_rgb_require_explicit_synchronization_fn *RequireExplicitSynchronization;
    rt_rgb_get_runtime_data_size_fn *GetRuntimeDataSize;
 } rt_render_graph_builder_platform_callbacks;
 typedef struct {
    uint32_t flags;
    /* Used for cheap referencing */
    uint32_t id;
    /* Used for debug output */
    const char *name;
    /* Viewport info  */
    rt_rect2 render_area;
    float min_depth;
    float max_depth;
    /* Render targets */
    rt_render_target_handle *color_outputs;
    rt_color *color_clear_values;
    rt_pass_load_mode *color_loads;
    rt_pass_write_mode *color_writes;
    uint32_t color_output_count;
    rt_render_target_handle depth_stencil;
    rt_depth_stencil_value depth_stencil_clear_value;
    rt_pass_load_mode depth_stencil_load;
    rt_pass_write_mode depth_stencil_write;
    rt_render_target_handle *sampled_inputs;
    uint32_t sampled_input_count;
    /* Used for parallelisation on the CPU-side. All passes with execution level N can
     * be recorded in parallel, after passes with level N-1 have finished. */
    uint32_t execution_level;
    /* GFX layer function for executing the pass */
    rt_execute_render_pass_fn *Execute;
    void *user_data;
    /* Allocated by the backend, used during runtime */
    void *runtime_data;
    /* These refer to the semaphores array */
    uint32_t first_wait;
    uint32_t wait_count;
    uint32_t first_signal;
    uint32_t signal_count;
 } rt_render_pass;
 struct rt_render_graph_s {
    rt_render_target_handle *render_targets;
    uint32_t render_target_count;
    rt_gpu_semaphore_handle *semaphores;
    uint32_t semaphore_count;
    rt_render_pass *passes;
    uint32_t pass_count;
    uint32_t backbuffer_index;
 };
 #ifdef __cplusplus
 extern "C" {
 #endif
 rt_render_graph_builder
 rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs);
 void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/renderer/dx11/buffers.cpp
+++ b/src/renderer/dx11/buffers.cpp
@ -1,146 +0,0 @@
 #include <d3d11.h>
 #include <d3d11_1.h>
 #include "gfx/renderer_api.h"
 #include "runtime/config.h"
 #include "runtime/handles.h"
 #include "runtime/threading_helpers.hpp"
 #include "device_objects.hpp"
 #include "gpu.hpp"
 RT_CVAR_I(rt_Dx11MaxBuffers,
          "Maximum number of simultaneously existing buffers. Default: 4096",
          4096);
 static rt_buffer *_buffers;
 static rt_buffer *_first_free;
 static rt_mutex *_lock;
 rt_result InitBufferManagement() {
    _buffers =
        reinterpret_cast<rt_buffer *>(calloc((size_t)rt_Dx11MaxBuffers.i, sizeof(rt_buffer)));
    if (!_buffers) {
        return RT_OUT_OF_MEMORY;
    }
    _lock = rtCreateMutex();
    if (!_lock) {
        free(_buffers);
        return RT_UNKNOWN_ERROR;
    }
    _first_free = _buffers + 2;
    for (int i = 0; i < rt_Dx11MaxBuffers.i; ++i) {
        _buffers[i].next_free = &_buffers[i + 1];
    }
    return RT_SUCCESS;
 }
 void ShutdownBufferManagement() {
    for (int i = 0; i < rt_Dx11MaxBuffers.i; ++i) {
        if (_buffers[i].buffer)
            _buffers[i].buffer->Release();
    }
    free(_buffers);
    rtDestroyMutex(_lock);
 }
 rt_buffer *rtGetBuffer(rt_buffer_handle handle) {
    if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxBuffers.i)
        return nullptr;
    auto lg = rtAutoLock(_lock);
    if (handle.version != _buffers[handle.index].version)
        return nullptr;
    return &_buffers[handle.index];
 }
 extern "C" rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
                                                       const rt_buffer_info *info,
                                                       rt_buffer_handle *p_buffers) {
    for (uint32_t i = 0; i < count; ++i) {
        rtLockMutex(_lock);
        rt_buffer *slot = _first_free;
        if (slot)
            _first_free = slot->next_free;
        rtUnlockMutex(_lock);
        if (!slot) {
            rtLog("dx11", "Failed to allocate a command buffer slot.");
            rtLockMutex(_lock);
            for (uint32_t j = 0; j < i; ++j) {
                rt_buffer *s = &_buffers[p_buffers[j].index];
                s->next_free = _first_free;
                _first_free  = s;
                _first_free  = s;
            }
            rtUnlockMutex(_lock);
            return RT_OUT_OF_MEMORY;
        }
        D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
        if (info[i].usage == RT_BUFFER_USAGE_STATIC) {
            usage = D3D11_USAGE_IMMUTABLE;
        } else if (info[i].usage == RT_BUFFER_USAGE_DYNAMIC) {
            usage = D3D11_USAGE_DEFAULT;
        } else if (info[i].usage == RT_BUFFER_USAGE_TRANSIENT) {
            usage = D3D11_USAGE_DYNAMIC;
        }
        UINT bind_flags = D3D11_BIND_UNORDERED_ACCESS;
        if (info[i].type == RT_BUFFER_TYPE_VERTEX)
            bind_flags = D3D11_BIND_VERTEX_BUFFER;
        else if (info[i].type == RT_BUFFER_TYPE_INDEX)
            bind_flags = D3D11_BIND_INDEX_BUFFER;
        else if (info[i].type == RT_BUFFER_TYPE_UNIFORM)
            bind_flags = D3D11_BIND_CONSTANT_BUFFER;
        else if (info[i].type == RT_BUFFER_TYPE_STORAGE)
            bind_flags = D3D11_BIND_UNORDERED_ACCESS;
        D3D11_BUFFER_DESC desc   = {};
        desc.ByteWidth           = static_cast<UINT>(((info[i].size + 15) / 16) * 16);
        desc.Usage               = usage;
        desc.BindFlags           = bind_flags;
        desc.CPUAccessFlags      = 0;
        desc.MiscFlags           = 0;
        desc.StructureByteStride = 1;
        D3D11_SUBRESOURCE_DATA data;
        data.pSysMem          = info->data;
        data.SysMemPitch      = 0;
        data.SysMemSlicePitch = 0;
        if (FAILED(
                g_gpu.device->CreateBuffer(&desc, info[i].data ? &data : nullptr, &slot->buffer))) {
            rtLog("dx11", "Failed to create a deferred context.");
            auto lock_guard = rtAutoLock(_lock);
            for (uint32_t j = 0; j < i; ++j) {
                rt_buffer *s = &_buffers[p_buffers[j].index];
                s->next_free = _first_free;
                _first_free  = s;
            }
            return RT_UNKNOWN_ERROR;
        }
        slot->version        = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
        const uint32_t index = (uint32_t)(slot - _buffers);
        p_buffers[i].version = slot->version;
        p_buffers[i].index   = index;
    }
    return RT_SUCCESS;
 }
 extern "C" void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
    for (uint32_t i = 0; i < count; ++i) {
        if (!RT_IS_HANDLE_VALID(buffers[i]) || (int)buffers[i].index >= rt_Dx11MaxBuffers.i)
            continue;
        auto lg = rtAutoLock(_lock);
        if (buffers[i].version != _buffers[buffers[i].index].version)
            continue;
        _buffers[buffers[i].index].buffer->Release();
        _buffers[buffers[i].index].next_free = _first_free;
        _first_free                          = &_buffers[buffers[i].index];
    }
 }
--- a/src/renderer/dx11/command_buffers.cpp
+++ b/src/renderer/dx11/command_buffers.cpp
@ -1,148 +0,0 @@
 #include <d3d11.h>
 #include <d3d11_1.h>
 #include "gfx/renderer_api.h"
 #include "runtime/config.h"
 #include "runtime/handles.h"
 #include "runtime/threading.h"
 #include "runtime/threading_helpers.hpp"
 #include "device_objects.hpp"
 #include "gpu.hpp"
 RT_CVAR_I(rt_Dx11MaxCommandBuffers,
          "Maximum number of simultaneously created command buffers. Default: 1024",
          1024);
 static rt_command_buffer *_buffers;
 static rt_command_buffer *_first_free;
 static rt_mutex *_lock;
 rt_result InitCommandBufferManagement() {
    _buffers = reinterpret_cast<rt_command_buffer *>(
        calloc((size_t)rt_Dx11MaxCommandBuffers.i, sizeof(rt_command_buffer)));
    if (!_buffers)
        return RT_OUT_OF_MEMORY;
    _first_free = &_buffers[1];
    _lock = rtCreateMutex();
    if (!_lock) {
        free(_buffers);
        return RT_UNKNOWN_ERROR;
    }
    for (int i = 0; i < rt_Dx11MaxCommandBuffers.i - 1; ++i) {
        _buffers[i].next_free = &_buffers[i + 1];
    }
    return RT_SUCCESS;
 }
 void ShutdownCommandBufferManagement() {
    for (int i = 0; i < rt_Dx11MaxCommandBuffers.i; ++i) {
        if (_buffers[i].context)
            _buffers[i].context->Release();
    }
    free(_buffers);
    _buffers = nullptr;
 }
 rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles) {
    for (uint32_t i = 0; i < count; ++i) {
        rtLockMutex(_lock);
        rt_command_buffer *slot = _first_free;
        if (slot)
            _first_free = slot->next_free;
        rtUnlockMutex(_lock);
        if (!slot) {
            rtLog("dx11", "Failed to allocate a command buffer slot.");
            rtLockMutex(_lock);
            for (uint32_t j = 0; j < i; ++j) {
                rt_command_buffer *s = &_buffers[p_handles[j].index];
                s->next_free         = _first_free;
                _first_free          = s;
            }
            rtUnlockMutex(_lock);
            return RT_OUT_OF_MEMORY;
        }
        if (!slot->context) {
            if (FAILED(g_gpu.device->CreateDeferredContext1(0, &slot->context))) {
                rtLog("dx11", "Failed to create a deferred context.");
                auto lock_guard = rtAutoLock(_lock);
                for (uint32_t j = 0; j < i; ++j) {
                    rt_command_buffer *s = &_buffers[p_handles[j].index];
                    s->next_free         = _first_free;
                    _first_free          = s;
                }
                return RT_UNKNOWN_ERROR;
            }
 #ifdef RT_DEBUG
            if (FAILED(slot->context->QueryInterface(IID_PPV_ARGS(&slot->annotation)))) {
                rtLog("dx11", "Failed to retrieve the annotation interface.");
                slot->annotation = nullptr;
            }
 #endif
        } else {
            slot->context->ClearState();
        }
        slot->version        = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
        const uint32_t index = (uint32_t)(slot - _buffers);
        p_handles[i].version = slot->version;
        p_handles[i].index   = index;
    }
    return RT_SUCCESS;
 }
 rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles) {
    // TODO: Handle semaphores
    // Submit the command lists to the gpu
    for (uint32_t i = 0; i < count; ++i) {
        rt_command_buffer *cmdbuf = &_buffers[handles[i].index];
        if (cmdbuf->version != handles[i].version) {
            rtLog("dx11", "Tried to submit an invalid command buffer (version mismatch)");
            return RT_INVALID_VALUE;
        }
        ID3D11CommandList *cmdlist;
        if (FAILED(cmdbuf->context->FinishCommandList(FALSE, &cmdlist))) {
            rtLog("dx11", "FinishCommandList failed");
            return RT_UNKNOWN_ERROR;
        }
        rtLockMutex(g_gpu.context_lock);
        g_gpu.device_context->ExecuteCommandList(cmdlist, FALSE);
        rtUnlockMutex(g_gpu.context_lock);
        rtLockMutex(_lock);
        cmdbuf->next_free = _first_free;
        _first_free       = cmdbuf;
        rtUnlockMutex(_lock);
    }
    return RT_SUCCESS;
 }
 rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle) {
    if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxCommandBuffers.i)
        return nullptr;
    auto lg = rtAutoLock(_lock);
    if (handle.version != _buffers[handle.index].version)
        return nullptr;
    return &_buffers[handle.index];
 }
 extern "C" rt_result
 RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
                                        const rt_alloc_command_buffer_info *,
                                        rt_command_buffer_handle *p_command_buffers) {
    return rtAllocCommandBuffers(count, p_command_buffers);
 }
 extern "C" rt_result
 RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue, const rt_submit_command_buffers_info *info) {
    return rtSubmitCommandBuffers(info->command_buffer_count, info->command_buffers);
 }
--- a/src/renderer/dx11/commands.cpp
+++ b/src/renderer/dx11/commands.cpp
@ -1,167 +0,0 @@
 #include <d3d11.h>
 #include <d3d11_1.h>
 #include "gfx/renderer_api.h"
 #include "runtime/mem_arena.h"
 #include "device_objects.hpp"
 #include "gpu.hpp"
 extern "C" void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmdhandle,
                                                 const rt_cmd_begin_pass_info *info) {
    rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
    if (!RT_VERIFY(cmd))
        return;
    if (cmd->annotation) {
        WCHAR wname[128];
        if (rtUTF8ToWStr(info->name, wname, sizeof(wname)) == RT_SUCCESS)
            cmd->annotation->BeginEvent(wname);
    }
    // Setup rtvs
    ID3D11RenderTargetView *rtvs[4];
    ID3D11DepthStencilView *dsv = nullptr;
    for (uint32_t i = 0; i < info->color_buffer_count; ++i) {
        rt_render_target *rt = rtGetRenderTarget(info->color_buffers[i]);
        if (!RT_VERIFY(rt))
            return;
        RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target");
        rtvs[i] = rt->rtv;
        if (info->color_buffer_loads[i] == RT_PASS_LOAD_MODE_CLEAR) {
            FLOAT color[4] = {
                info->color_buffer_clear_values[i].color.r,
                info->color_buffer_clear_values[i].color.g,
                info->color_buffer_clear_values[i].color.b,
                info->color_buffer_clear_values[i].color.a,
            };
            cmd->context->ClearRenderTargetView(rt->rtv, color);
        }
    }
    rt_render_target *dsvrt = rtGetRenderTarget(info->depth_stencil_buffer);
    if (dsvrt) {
        RT_ASSERT(dsvrt->IsDepthStencilTarget(),
                  "Need to provide a valid depth stencil render target");
        dsv = dsvrt->dsv;
        if (info->depth_stencil_buffer_load == RT_PASS_LOAD_MODE_CLEAR)
            cmd->context->ClearDepthStencilView(
                dsv,
                (dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL
                                               : D3D11_CLEAR_DEPTH,
                info->depth_stencil_buffer_clear_value.depth_stencil.depth,
                static_cast<UINT8>(info->depth_stencil_buffer_clear_value.depth_stencil.stencil));
    }
    cmd->context->OMSetRenderTargets(static_cast<UINT>(info->color_buffer_count), rtvs, dsv);
    D3D11_VIEWPORT viewport;
    viewport.TopLeftX = static_cast<float>(info->render_area.offset.x);
    viewport.TopLeftY = static_cast<float>(info->render_area.offset.y);
    viewport.Width = static_cast<float>(info->render_area.size.x);
    viewport.Height = static_cast<float>(info->render_area.size.y);
    viewport.MinDepth = 0.f;
    viewport.MaxDepth = 1.f;
    cmd->context->RSSetViewports(1, &viewport);
    // We currently only support triangles, so here is a good place to set this
    cmd->context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
 }
 extern "C" void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmdhandle) {
    rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
    if (!RT_VERIFY(cmd))
        return;
    if (cmd->annotation) {
        cmd->annotation->EndEvent();
    }
 }
 extern "C" void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdhandle,
                                                              rt_render_target_handle target,
                                                              rt_render_target_state state) {
    rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
    if (!RT_VERIFY(cmd))
        return;
    RT_UNUSED(target);
    RT_UNUSED(state);
 }
 extern "C" void
 RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdhandle,
                                              rt_render_target_handle render_target) {
    rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
    if (!RT_VERIFY(cmd))
        return;
    RT_UNUSED(render_target);
 }
 extern "C" void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle,
                                                    rt_pipeline_handle pipeline_handle) {
    rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
    if (!RT_VERIFY(cmd))
        return;
    rt_pipeline *pipeline = rtGetPipeline(pipeline_handle);
    if (pipeline->IsComputePipeline()) {
        rtReportError("dx11",
                      "Attempted to bind a compute pipeline via CmdBindPipeline. Use "
                      "CmdBindComputePipeline instead.");
        return;
    }
    auto context = cmd->context;
    context->IASetInputLayout(pipeline->input_layout);
    context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
    context->VSSetShader(pipeline->vertex_shader, nullptr, 0);
    context->PSSetShader(pipeline->pixel_shader, nullptr, 0);
    context->RSSetState(pipeline->rasterizer_state);
 }
 extern "C" void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle,
                                                         uint32_t first_binding,
                                                         uint32_t count,
                                                         const rt_buffer_handle *buffers,
                                                         const uint32_t *_strides,
                                                         const uint32_t *_offsets) {
    rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
    if (!RT_VERIFY(cmd))
        return;
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    if (!temp.arena)
        return;
    ID3D11Buffer **vbos = RT_ARENA_PUSH_ARRAY(temp.arena, ID3D11Buffer *, count);
    static_assert(sizeof(UINT) == sizeof(uint32_t));
    const UINT *offsets = _offsets;
    const UINT *strides = _strides;
    if (!vbos || !strides)
        goto out;
    if (!offsets) {
         offsets = RT_ARENA_PUSH_ARRAY_ZERO(temp.arena, UINT, count);
    }
    for (uint32_t i = 0; i < count; ++i) {
        rt_buffer *buffer = rtGetBuffer(buffers[i]);
        RT_ASSERT(buffer->type == RT_BUFFER_TYPE_VERTEX, "Buffer must be a vertex buffer");
        vbos[i] = buffer->buffer;
    }
    cmd->context->IASetVertexBuffers(first_binding, count, vbos, strides, offsets);
 out:
    rtReturnTemporaryArena(temp);
 }
 extern "C" void
 RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle, uint32_t first, uint32_t count) {
    rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
    if (!RT_VERIFY(cmd))
        return;
    cmd->context->Draw(count, first);
 }
--- a/src/renderer/dx11/device_objects.hpp
+++ b/src/renderer/dx11/device_objects.hpp
@ -1,95 +0,0 @@
 #ifndef RT_DX11_DEVICE_OBJECTS_HPP
 #define RT_DX11_DEVICE_OBJECTS_HPP
 // Types containing various api objects
 #include <stdint.h>
 #include <d3d11.h>
 #include <d3d11_1.h>
 #include "runtime/runtime.h"
 struct rt_render_target {
    // Only one of these should be valid
    ID3D11RenderTargetView *rtv;
    ID3D11DepthStencilView *dsv;
    ID3D11Texture2D *texture;
    rt_pixel_format format;
    uint32_t version;
    rt_render_target *next_free;
    RT_INLINE bool HasStencilComponent() const {
        return format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8;
    }
    RT_INLINE bool IsColorRenderTarget() const {
        RT_ASSERT(!(rtv != nullptr && dsv != nullptr),
                  "A render target should not contain a render target and a depth stencil view");
        return rtv != nullptr;
    }
    RT_INLINE bool IsDepthStencilTarget() const {
        RT_ASSERT(!(rtv != nullptr && dsv != nullptr),
                  "A render target should not contain a render target and a depth stencil view");
        return dsv != nullptr;
    }
 };
 struct rt_command_buffer {
    // Only created once and then re-used.
    ID3D11DeviceContext1 *context;
    ID3DUserDefinedAnnotation *annotation;
    uint32_t version;
    rt_command_buffer *next_free;
 };
 struct rt_buffer {
    ID3D11Buffer *buffer;
    rt_buffer_type type;
    rt_buffer_usage usage;
    uint32_t version;
    rt_buffer *next_free;
 };
 struct rt_pipeline {
    ID3D11InputLayout *input_layout;
    ID3D11VertexShader *vertex_shader;
    ID3D11PixelShader *pixel_shader;
    ID3D11ComputeShader *compute_shader;
    ID3D11RasterizerState *rasterizer_state;
    rt_pipeline *next_free;
    uint32_t version;
    RT_INLINE bool IsComputePipeline() const {
        RT_ASSERT(!(compute_shader && (vertex_shader || pixel_shader)),
                  "A pipeline should contain either a compute shader or graphics shaders.");
        return compute_shader != nullptr;
    }
 };
 struct rt_render_target_create_info {
    rt_pixel_format format;
    uint32_t width;
    uint32_t height;
    const char *name;
 };
 rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info);
 void rtDestroyRenderTarget(rt_render_target_handle handle);
 rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles);
 rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles);
 rt_render_target *rtGetRenderTarget(rt_render_target_handle handle);
 rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle);
 rt_buffer *rtGetBuffer(rt_buffer_handle handle);
 rt_pipeline *rtGetPipeline(rt_pipeline_handle handle);
 #endif
--- a/src/renderer/dx11/gpu.hpp
+++ b/src/renderer/dx11/gpu.hpp
@ -1,43 +0,0 @@
 #ifndef RT_DX11_GPU_HPP
 #define RT_DX11_GPU_HPP
 #include <wrl.h>
 #include <d3d11.h>
 #include <d3d11_1.h>
 #include <dxgi1_3.h>
 #include "runtime/threading.h"
 #include "gfx/renderer_api.h"
 #define RT_DX11_MAX_FRAMES_IN_FLIGHT 2
 // Smart pointer for COM-Objects
 template<typename T>
 using ComPtr = Microsoft::WRL::ComPtr<T>;
 struct rt_swap_chain {
    ComPtr<IDXGISwapChain1> swap_chain;
    ComPtr<ID3D11RenderTargetView> rtv;
 };
 struct rt_gpu {
    ComPtr<ID3D11Device1> device;
    ComPtr<ID3D11DeviceContext1> device_context;
    ComPtr<IDXGIFactory2> dxgi_factory;
    rt_swap_chain swap_chain;
    rt_mutex *context_lock;
    D3D_FEATURE_LEVEL feature_level;
    D3D11_FEATURE_DATA_THREADING threading_support;
 };
 #ifndef DONT_DEFINE_GPU_GLOBAL
 extern rt_gpu g_gpu;
 #endif
 DXGI_FORMAT rtConvertPixelFormat(rt_pixel_format format);
 #endif
--- a/src/renderer/dx11/helpers.cpp
+++ b/src/renderer/dx11/helpers.cpp
@ -1,35 +0,0 @@
 #include "gpu.hpp"
 DXGI_FORMAT rtConvertPixelFormat(rt_pixel_format format) {
    switch (format) {
    case RT_PIXEL_FORMAT_INVALID:
        return DXGI_FORMAT_UNKNOWN;
    case RT_PIXEL_FORMAT_R8G8B8A8_UNORM:
        return DXGI_FORMAT_R8G8B8A8_UNORM;
    case RT_PIXEL_FORMAT_B8G8R8A8_UNORM:
        return DXGI_FORMAT_B8G8R8A8_UNORM;
    case RT_PIXEL_FORMAT_R8G8B8A8_SRGB:
        return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
    case RT_PIXEL_FORMAT_B8G8R8A8_SRGB:
        return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
    case RT_PIXEL_FORMAT_R8G8B8_UNORM:
        return DXGI_FORMAT_R8G8B8A8_UNORM;
    case RT_PIXEL_FORMAT_B8G8R8_UNORM:
        return DXGI_FORMAT_B8G8R8X8_UNORM;
    case RT_PIXEL_FORMAT_R8G8B8_SRGB:
        return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
    case RT_PIXEL_FORMAT_B8G8R8_SRGB:
        return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB;
    case RT_PIXEL_FORMAT_DEPTH24_STENCIL8:
        return DXGI_FORMAT_D24_UNORM_S8_UINT;
    case RT_PIXEL_FORMAT_DEPTH32:
        return DXGI_FORMAT_D32_FLOAT;
    case RT_PIXEL_FORMAT_SWAPCHAIN:
        return DXGI_FORMAT_B8G8R8A8_UNORM;
    default:
        return DXGI_FORMAT_UNKNOWN;
    }
 }
--- a/src/renderer/dx11/init.cpp
+++ b/src/renderer/dx11/init.cpp
@ -1,288 +0,0 @@
 #ifndef _WIN32
 #pragma warning Building DX11 on non - windows is probably a mistake
 #endif
 #include <d3d11.h>
 #include <dxgi1_3.h>
 #include <wrl.h>
 #include "gfx/renderer_api.h"
 #include "runtime/config.h"
 #define DONT_DEFINE_RENDERER_GLOBAL
 #include "gpu.hpp"
 RT_CVAR_S(
    rt_Dx11AdapterName,
    "Name of the adapter that should be used for device creation. Default: \"\" (Use default)",
    "");
 RT_CVAR_I(rt_Dx11VSync, "Enable vsync. Default: 1", 1);
 RT_CVAR_I(rt_Dx11MaxSubmittedCommandBuffers,
          "Maximum number of submitted command buffers per frame. Default: 1024",
          1024);
 extern rt_cvar rt_Dx11MaxCommandBuffers;
 rt_gpu g_gpu;
 extern "C" void RT_RENDERER_API_FN(RegisterCVars)(void) {
    rtRegisterCVAR(&rt_Dx11AdapterName);
    rtRegisterCVAR(&rt_Dx11VSync);
    rtRegisterCVAR(&rt_Dx11MaxCommandBuffers);
 }
 static rt_swap_chain CreateSwapChain(HWND hwnd) {
    rt_swap_chain swc;
    DXGI_SWAP_CHAIN_DESC1 desc;
    desc.Width  = 0;                          // use window width
    desc.Height = 0;                          // use window height
    desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; // can't specify _SRGB here when using
                                              // DXGI_SWAP_EFFECT_FLIP_* ...;
    desc.Stereo             = FALSE;
    desc.SampleDesc.Count   = 1;
    desc.SampleDesc.Quality = 0;
    desc.BufferUsage        = DXGI_USAGE_RENDER_TARGET_OUTPUT;
    desc.BufferCount        = 2;
    desc.Scaling            = DXGI_SCALING_STRETCH;
    desc.SwapEffect         = DXGI_SWAP_EFFECT_FLIP_DISCARD;
    desc.AlphaMode          = DXGI_ALPHA_MODE_UNSPECIFIED;
    desc.Flags              = 0;
    if (FAILED(g_gpu.dxgi_factory->CreateSwapChainForHwnd(g_gpu.device.Get(),
                                                          hwnd,
                                                          &desc,
                                                          nullptr,
                                                          nullptr,
                                                          &swc.swap_chain))) {
        rtReportError("dx11", "Failed to create the swap chain.");
        return swc;
    }
    ID3D11Texture2D *frame_buffer;
    if (FAILED(swc.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) {
        rtReportError("dx11", "Failed to retrieve the backbuffer.");
        swc.swap_chain.Reset();
        return swc;
    }
    D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
    rtv_desc.Format                        = DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
    rtv_desc.ViewDimension                 = D3D11_RTV_DIMENSION_TEXTURE2D;
    if (FAILED(g_gpu.device->CreateRenderTargetView(frame_buffer, &rtv_desc, &swc.rtv))) {
        rtReportError("dx11", "Failed to create the render target view for the backbuffer.");
        swc.swap_chain.Reset();
        return swc;
    }
    return swc;
 }
 static IDXGIAdapter *RetrieveSelectedAdapter(void) {
    ComPtr<IDXGIFactory2> factory;
    if (FAILED(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory)))) {
        return NULL;
    }
    UINT i = 0;
    IDXGIAdapter *adapter;
    while (factory->EnumAdapters(i, &adapter) == S_OK) {
        ++i;
        DXGI_ADAPTER_DESC desc;
        adapter->GetDesc(&desc);
        char utf8_desc[256];
        rtWStrToUTF8(desc.Description, utf8_desc, 256);
        if (strncmp(utf8_desc, rt_Dx11AdapterName.s, 256) == 0)
            return adapter;
    }
    return NULL;
 }
 extern rt_result InitCommandBufferManagement();
 extern void ShutdownCommandBufferManagement();
 extern rt_result InitRenderTargetManagement();
 extern void ShutdownRenderTargetManagement();
 extern rt_result InitBufferManagement();
 extern void ShutdownBufferManagement();
 extern rt_result InitPipelineManagement();
 extern void ShutdownPipelineManagement();
 extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
    constexpr D3D_FEATURE_LEVEL feature_levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0};
    UINT device_flags                            = 0;
 #ifdef RT_DEBUG
    device_flags |= D3D11_CREATE_DEVICE_DEBUG;
 #endif
    IDXGIAdapter *selected_adapter = RetrieveSelectedAdapter();
    ID3D11Device *base_device;
    ID3D11DeviceContext *base_context;
    if (FAILED(D3D11CreateDevice(selected_adapter,
                                 D3D_DRIVER_TYPE_HARDWARE,
                                 nullptr,
                                 device_flags,
                                 feature_levels,
                                 RT_ARRAY_COUNT(feature_levels),
                                 D3D11_SDK_VERSION,
                                 &base_device,
                                 &g_gpu.feature_level,
                                 &base_context))) {
        rtLog("dx11", "Feature level 11.1 creation failed, retrying with feature level 11.0");
        if (FAILED(D3D11CreateDevice(selected_adapter,
                                     D3D_DRIVER_TYPE_HARDWARE,
                                     nullptr,
                                     device_flags,
                                     &feature_levels[1],
                                     RT_ARRAY_COUNT(feature_levels) - 1,
                                     D3D11_SDK_VERSION,
                                     &base_device,
                                     &g_gpu.feature_level,
                                     &base_context))) {
            rtReportError("dx11", "Failed to create the d3d11 device.");
            return RT_UNKNOWN_ERROR;
        }
    }
    if (FAILED(base_device->QueryInterface(IID_PPV_ARGS(&g_gpu.device)))) {
        rtReportError("dx11", "Failed to query the D3D11Device1 interface.");
        return RT_UNKNOWN_ERROR;
    }
    if (FAILED(base_context->QueryInterface(IID_PPV_ARGS(&g_gpu.device_context)))) {
        rtReportError("dx11", "Failed to query the D3D11DeviceContext1 interface.");
        return RT_UNKNOWN_ERROR;
    }
    IDXGIDevice1 *dxgi_device;
    if (FAILED(g_gpu.device->QueryInterface(&dxgi_device))) {
        rtReportError("dx11", "Failed to query the DXGIDevice1 interface.");
        return RT_UNKNOWN_ERROR;
    }
    IDXGIAdapter *adapter;
    if (FAILED(dxgi_device->GetAdapter(&adapter))) {
        rtReportError("dx11", "Failed to retrieve the dxgi adapter.");
        return RT_UNKNOWN_ERROR;
    }
    if (FAILED(adapter->GetParent(IID_PPV_ARGS(&g_gpu.dxgi_factory)))) {
        rtReportError("dx11", "Failed to retrieve the dxgi factory.");
        return RT_UNKNOWN_ERROR;
    }
    g_gpu.device->CheckFeatureSupport(D3D11_FEATURE_THREADING,
                                      &g_gpu.threading_support,
                                      sizeof(g_gpu.threading_support));
    g_gpu.swap_chain = CreateSwapChain(info->hWnd);
    g_gpu.context_lock = rtCreateMutex();
    rt_result res = InitCommandBufferManagement();
    if (res != RT_SUCCESS)
        return res;
    res = InitRenderTargetManagement();
    if (res != RT_SUCCESS)
        return res;
    res = InitBufferManagement();
    if (res != RT_SUCCESS)
        return res;
    res = InitPipelineManagement();
    if (res != RT_SUCCESS)
        return res;
    return RT_SUCCESS;
 }
 extern "C" void RT_RENDERER_API_FN(Shutdown)(void) {
    ShutdownPipelineManagement();
    ShutdownBufferManagement();
    ShutdownRenderTargetManagement();
    ShutdownCommandBufferManagement();
    rtDestroyMutex(g_gpu.context_lock);
    g_gpu.swap_chain.rtv.Reset();
    g_gpu.swap_chain.swap_chain.Reset();
    g_gpu.dxgi_factory.Reset();
    g_gpu.device.Reset();
 }
 extern "C" unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
    return RT_DX11_MAX_FRAMES_IN_FLIGHT;
 }
 extern "C" void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
    RT_UNUSED(frame_id);
    FLOAT clear_color[4] = {
        0,
        0,
        0,
        0,
    };
    rtLockMutex(g_gpu.context_lock);
    g_gpu.device_context->ClearRenderTargetView(g_gpu.swap_chain.rtv.Get(), clear_color);
    rtUnlockMutex(g_gpu.context_lock);
 }
 extern "C" void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
    RT_UNUSED(frame_id);
    rtLockMutex(g_gpu.context_lock);
    UINT sync_interval = rt_Dx11VSync.i ? 1 : 0;
    g_gpu.swap_chain.swap_chain->Present(sync_interval, 0);
    rtUnlockMutex(g_gpu.context_lock);
 }
 // Copied from null. Delete once no longer needed
 extern "C" {
 #define RETURN_HANDLE_STUB2(type, initial)                                                         \
    static unsigned int s_next = (initial);                                                        \
    s_next                     = (s_next + 1) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX;                \
    type h                     = {                                                                 \
        1,                                                                     \
        s_next,                                                                \
    };                                                                         \
    return h;
 #define RETURN_HANDLE_STUB(type) RETURN_HANDLE_STUB2(type, 1)
 #define RETURN_HANDLE_ARRAY_STUB2(out, count, initial)                                             \
    static unsigned int s_next = (initial);                                                        \
    for (uint32_t i = 0; i < (count); ++i) {                                                       \
        (out)[i].index   = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX;                        \
        (out)[i].version = 1;                                                                      \
    }
 #define RETURN_HANDLE_ARRAY_STUB(out, count) RETURN_HANDLE_ARRAY_STUB2(out, count, 1)
 rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count,
                                               const rt_gpu_semaphore_info *info,
                                               rt_gpu_semaphore_handle *p_semaphores) {
    RT_UNUSED(info);
    RETURN_HANDLE_ARRAY_STUB2(p_semaphores, count, 3)
    return RT_SUCCESS;
 }
 void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) {
    RT_UNUSED(count);
    RT_UNUSED(semaphores);
 }
 /* NOTE(Kevin): It might become necessary to actually track the value, to correctly simulate gpu
 * behaviour */
 uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle sem) {
    RT_UNUSED(sem);
    return 0;
 }
 rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) {
    return {1, 1};
 }
 rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) {
    return {1, 2};
 }
 }
--- a/src/renderer/dx11/meson.build
+++ b/src/renderer/dx11/meson.build
@ -1,31 +0,0 @@
 if get_option('build_dx11')
    dx11_dep = declare_dependency(link_args: ['-ld3d11', '-ldxgi', '-lwinmm', '-ldxguid'])
    dx11_renderer_lib = library('rtdx11',
        # Project Sources
        'device_objects.hpp',
        'gpu.hpp',
        '../common/common_render_graph.h',
        'buffers.cpp',
        'commands.cpp',
        'command_buffers.cpp',
        'helpers.cpp',
        'init.cpp',
        'pipelines.cpp',
        'render_graph.cpp',
        'render_targets.cpp',
        '../common/common_render_graph.c',
        dependencies : [m_dep, windowing_dep, dx11_dep],
        include_directories : [engine_incdir, contrib_incdir],
        link_with : [runtime_lib],
        cpp_pch : 'pch/dx11_pch.h',
        override_options : ['b_sanitize=none'],
        install : true)
    engine_libs += dx11_renderer_lib
    engine_lib_paths += dx11_renderer_lib.full_path()
 endif
--- a/src/renderer/dx11/pch/dx11_pch.h
+++ b/src/renderer/dx11/pch/dx11_pch.h
@ -1,5 +0,0 @@
 // DX11 headers
 #include <wrl.h>
 #include <d3d11.h>
 #include <d3d11_1.h>
 #include <dxgi1_3.h>
--- a/src/renderer/dx11/pipelines.cpp
+++ b/src/renderer/dx11/pipelines.cpp
@ -1,238 +0,0 @@
 #include <d3d11.h>
 #include <d3d11_1.h>
 #include "gfx/effect.h"
 #include "gfx/renderer_api.h"
 #include "runtime/config.h"
 #include "runtime/handles.h"
 #include "runtime/mem_arena.h"
 #include "runtime/threading_helpers.hpp"
 #include "device_objects.hpp"
 #include "gpu.hpp"
 RT_CVAR_I(rt_Dx11MaxPipelines,
          "Maximum number of simultaneously existing pipelines. Default: 128",
          128);
 static rt_pipeline *_pipelines;
 static rt_pipeline *_first_free;
 static rt_mutex *_lock;
 rt_result InitPipelineManagement() {
    _pipelines =
        reinterpret_cast<rt_pipeline *>(calloc((size_t)rt_Dx11MaxPipelines.i, sizeof(rt_pipeline)));
    if (!_pipelines)
        return RT_OUT_OF_MEMORY;
    _first_free = _pipelines + 1;
    for (int i = 0; i < rt_Dx11MaxPipelines.i - 1; ++i)
        _pipelines[i].next_free = &_pipelines[i + 1];
    _lock = rtCreateMutex();
    if (!_lock) {
        free(_pipelines);
        return RT_UNKNOWN_ERROR;
    }
    return RT_SUCCESS;
 }
 void ShutdownPipelineManagement() {
    for (int i = 0; i < rt_Dx11MaxPipelines.i; ++i) {
        if (_pipelines[i].compute_shader)
            _pipelines[i].compute_shader->Release();
        if (_pipelines[i].vertex_shader)
            _pipelines[i].vertex_shader->Release();
        if (_pipelines[i].pixel_shader)
            _pipelines[i].pixel_shader->Release();
        if (_pipelines[i].input_layout)
            _pipelines[i].input_layout->Release();
    }
    free(_pipelines);
    rtDestroyMutex(_lock);
 }
 rt_result GetShader(rt_resource_id id, rt_shader_info **p_shader, rt_arena *arena) {
    size_t shader_size = rtGetResourceSize(id);
    if (shader_size == 0)
        return RT_INVALID_VALUE;
    void *buffer = rtArenaPush(arena, shader_size);
    if (!buffer)
        return RT_OUT_OF_MEMORY;
    rt_result res = rtGetResource(id, buffer);
    if (res != RT_SUCCESS) {
        rtArenaPop(arena, shader_size);
        return res;
    }
    rt_resource *resource = reinterpret_cast<rt_resource *>(buffer);
    RT_ASSERT(resource->type == RT_RESOURCE_SHADER, "Expected a shader");
    *p_shader = reinterpret_cast<rt_shader_info *>(resource->data);
    return RT_SUCCESS;
 }
 extern "C" rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
    rt_pipeline *slot = nullptr;
    {
        auto lg = rtAutoLock(_lock);
        slot = _first_free;
        if (slot)
            _first_free = slot->next_free;
    }
    if (!slot) {
        rtLog("dx11", "Could not create pipeline, because no slots are available.");
        return RT_INVALID_HANDLE;
    }
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    if (info->vertex_shader != RT_INVALID_RESOURCE_ID) {
        rt_shader_info *vs;
        if (GetShader(info->vertex_shader, &vs, temp.arena) != RT_SUCCESS) {
            rtReportError("dx11", "Could not retrieve vertex shader data.");
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
        void *bytecode = rtResolveRelptr(&vs->bytecode);
        if (FAILED(g_gpu.device->CreateVertexShader(bytecode,
                                                    vs->bytecode_length,
                                                    NULL,
                                                    &slot->vertex_shader))) {
            rtReportError("dx11", "Vertex shader creation failed");
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
        // TODO: effects should specify the expected vertex layout
        // For now, we use a default
        /* clang-format off */
        D3D11_INPUT_ELEMENT_DESC default_layout[] = {
            {"POSITION", 0,  DXGI_FORMAT_R32G32B32_FLOAT, 0,                            0, D3D11_INPUT_PER_VERTEX_DATA, 0},
            {"NORMAL",   0,  DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
            {"TANGENT",  0,  DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
            {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT,     0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0},
        };
        /* clang-format on */
        if (FAILED(g_gpu.device->CreateInputLayout(default_layout,
                                                   RT_ARRAY_COUNT(default_layout),
                                                   bytecode,
                                                   vs->bytecode_length,
                                                   &slot->input_layout))) {
            rtReportError("dx11", "Failed to create the vertex layout.");
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
    }
    if (info->fragment_shader != RT_INVALID_RESOURCE_ID) {
        rt_shader_info *vs;
        if (GetShader(info->fragment_shader, &vs, temp.arena) != RT_SUCCESS) {
            rtReportError("dx11", "Could not retrieve fragment shader data.");
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
        void *bytecode = rtResolveRelptr(&vs->bytecode);
        if (FAILED(g_gpu.device->CreatePixelShader(bytecode,
                                                   vs->bytecode_length,
                                                   NULL,
                                                   &slot->pixel_shader))) {
            rtReportError("dx11", "Fragment shader creation failed");
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
    }
    if (info->compute_shader != RT_INVALID_RESOURCE_ID) {
        rt_shader_info *vs;
        if (GetShader(info->compute_shader, &vs, temp.arena) != RT_SUCCESS) {
            rtReportError("dx11", "Could not retrieve compute shader data.");
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
        void *bytecode = rtResolveRelptr(&vs->bytecode);
        if (FAILED(g_gpu.device->CreateComputeShader(bytecode,
                                                     vs->bytecode_length,
                                                     NULL,
                                                     &slot->compute_shader))) {
            rtReportError("dx11", "Compute shader creation failed");
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
    }
    // TODO: Effects should specifiy the rasterizer state
    // DX11 only supports up to 4096 rasterizer state objects.
    // We could cache these and only create the distinct objects.
    D3D11_RASTERIZER_DESC rasterizer_desc;
    rasterizer_desc.FillMode              = D3D11_FILL_SOLID;
    rasterizer_desc.CullMode              = D3D11_CULL_NONE;
    rasterizer_desc.FrontCounterClockwise = TRUE;
    rasterizer_desc.DepthBias             = 0;
    rasterizer_desc.DepthBiasClamp        = 0.f;
    rasterizer_desc.SlopeScaledDepthBias  = 0.f;
    rasterizer_desc.DepthClipEnable       = TRUE;
    rasterizer_desc.ScissorEnable         = FALSE;
    rasterizer_desc.MultisampleEnable     = TRUE;
    rasterizer_desc.AntialiasedLineEnable = TRUE;
    if (FAILED(g_gpu.device->CreateRasterizerState(&rasterizer_desc, &slot->rasterizer_state))) {
        rtReportError("dx11", "Rasterizer state creation failed");
        auto lg         = rtAutoLock(_lock);
        slot->next_free = _first_free;
        _first_free     = slot;
        return RT_INVALID_HANDLE;
    }
    slot->version  = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
    uint32_t index = static_cast<uint32_t>(slot - _pipelines);
    return {slot->version, index};
 }
 extern "C" void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
    if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxPipelines.i)
        return;
    auto lg = rtAutoLock(_lock);
    if (handle.version != _pipelines[handle.index].version)
        return;
    if (_pipelines[handle.index].compute_shader)
        _pipelines[handle.index].compute_shader->Release();
    if (_pipelines[handle.index].vertex_shader)
        _pipelines[handle.index].vertex_shader->Release();
    if (_pipelines[handle.index].pixel_shader)
        _pipelines[handle.index].pixel_shader->Release();
    if (_pipelines[handle.index].input_layout)
        _pipelines[handle.index].input_layout->Release();
    _pipelines[handle.index].next_free = _first_free;
    _pipelines[handle.index].version =
        (_pipelines[handle.index].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
    _first_free = &_pipelines[handle.index];
 }
 rt_pipeline *rtGetPipeline(rt_pipeline_handle handle) {
    if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxPipelines.i)
        return nullptr;
    auto lg = rtAutoLock(_lock);
    if (handle.version != _pipelines[handle.index].version)
        return nullptr;
    return &_pipelines[handle.index];
 }
--- a/src/renderer/dx11/render_graph.cpp
+++ b/src/renderer/dx11/render_graph.cpp
@ -1,229 +0,0 @@
 #include "gfx/render_view.h"
 #include "gfx/renderer_api.h"
 #include "renderer/common/common_render_graph.h"
 #include "device_objects.hpp"
 #include "gpu.hpp"
 static constexpr unsigned int MAX_SUBMITTED_VIEWS_PER_PASS = 4;
 struct rt_pass_runtime_data {
    rt_render_view views[RT_DX11_MAX_FRAMES_IN_FLIGHT][MAX_SUBMITTED_VIEWS_PER_PASS];
    uint32_t view_count[RT_DX11_MAX_FRAMES_IN_FLIGHT];
    unsigned int views_frame_id[RT_DX11_MAX_FRAMES_IN_FLIGHT];
 };
 static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) {
    return rtCreateRenderTarget({.format = rtinfo->format,
                                 .width  = rtinfo->width,
                                 .height = rtinfo->height,
                                 .name   = rtinfo->name});
 }
 static int RequireExplicitSynchronization() {
    return 0;
 }
 static size_t GetRuntimeDataSize() {
    return sizeof(rt_pass_runtime_data);
 }
 extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
    rt_render_graph_builder_platform_callbacks cbs{};
    cbs.CreateRenderTarget             = CreateRenderTarget;
    cbs.RequireExplicitSynchronization = RequireExplicitSynchronization;
    cbs.GetRuntimeDataSize             = GetRuntimeDataSize;
    return rtCreateRenderGraphBuilder(&cbs);
 }
 extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
    rtDestroyRenderGraphBuilder(builder);
 }
 extern "C" void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
                                                     uint32_t pass_id,
                                                     rt_render_view view,
                                                     unsigned int frame_id) {
    for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
        if (render_graph->passes[i].id == pass_id) {
            rt_render_pass *pass = &render_graph->passes[i];
            rt_pass_runtime_data *runtime_data =
                reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
            unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
            RT_ASSERT(runtime_data->views_frame_id[frame_slot] == frame_id ||
                          runtime_data->views_frame_id[frame_slot] == 0,
                      "Tried to submit a view for a not-current frame.");
            if (!RT_VERIFY(runtime_data->view_count[frame_slot] < MAX_SUBMITTED_VIEWS_PER_PASS))
                return;
            runtime_data->views[frame_slot][runtime_data->view_count[frame_slot]++] = view;
            runtime_data->views_frame_id[frame_slot]                                = frame_id;
        }
    }
 }
 extern "C" void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph,
                                                     unsigned int frame_id) {
    unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
    for (uint32_t i = 0; i < graph->pass_count; ++i) {
        rt_pass_runtime_data *runtime_data =
            reinterpret_cast<rt_pass_runtime_data *>(graph->passes[i].runtime_data);
 #ifdef RT_DEBUG
        memset(runtime_data->views[frame_slot], 0, sizeof(runtime_data->views[frame_slot]));
 #endif
        runtime_data->view_count[frame_slot] = 0;
        runtime_data->views_frame_id[frame_slot] = 0;
    }
 }
 static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdbuf_handle, unsigned int frame_id) {
    rt_command_buffer *cmd = rtGetCommandBuffer(cmdbuf_handle);
    if (!RT_VERIFY(cmd))
        return RT_INVALID_VALUE;
    if (cmd->annotation) {
        WCHAR wname[128];
        if (rtUTF8ToWStr(pass->name, wname, sizeof(wname)) == RT_SUCCESS)
            cmd->annotation->BeginEvent(wname);
    }
    // Setup rtvs
    ID3D11RenderTargetView *rtvs[4];
    ID3D11DepthStencilView *dsv = nullptr;
    for (uint32_t i = 0; i < pass->color_output_count; ++i) {
        rt_render_target *rt = rtGetRenderTarget(pass->color_outputs[i]);
        if (!RT_VERIFY(rt))
            return RT_INVALID_VALUE;
        RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target");
        rtvs[i] = rt->rtv;
        if (pass->color_loads[i] == RT_PASS_LOAD_MODE_CLEAR) {
            FLOAT color[4] = {
                pass->color_clear_values[i].r,
                pass->color_clear_values[i].g,
                pass->color_clear_values[i].b,
                pass->color_clear_values[i].a,
            };
            cmd->context->ClearRenderTargetView(rt->rtv, color);
        }
    }
    rt_render_target *dsvrt = rtGetRenderTarget(pass->depth_stencil);
    if (dsvrt) {
        RT_ASSERT(dsvrt->IsDepthStencilTarget(),
                  "Need to provide a valid depth stencil render target");
        dsv = dsvrt->dsv;
        if (pass->depth_stencil_load == RT_PASS_LOAD_MODE_CLEAR)
            cmd->context->ClearDepthStencilView(
                dsv,
                (dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL
                                               : D3D11_CLEAR_DEPTH,
                pass->depth_stencil_clear_value.depth,
                static_cast<UINT8>(pass->depth_stencil_clear_value.stencil));
    }
    cmd->context->OMSetRenderTargets(static_cast<UINT>(pass->color_output_count), rtvs, dsv);
    D3D11_VIEWPORT viewport;
    viewport.TopLeftX = pass->render_area.offset.x;
    viewport.TopLeftY = pass->render_area.offset.y;
    viewport.Width    = pass->render_area.size.x;
    viewport.Height   = pass->render_area.size.y;
    viewport.MinDepth = pass->min_depth;
    viewport.MaxDepth = pass->max_depth;
    if (viewport.Width == 0 || viewport.Height == 0) {
        DXGI_SWAP_CHAIN_DESC desc;
        g_gpu.swap_chain.swap_chain->GetDesc(&desc);
        if (viewport.Width == 0)
            viewport.Width = static_cast<float>(desc.BufferDesc.Width);
        if (viewport.Height == 0)
            viewport.Height = static_cast<float>(desc.BufferDesc.Height);
    }
    cmd->context->RSSetViewports(1, &viewport);
    auto runtime_data = reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
    RT_VERIFY(runtime_data);
    unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
    rt_result res = RT_VERIFY(pass->Execute)(pass->id,
                                             cmdbuf_handle,
                                             runtime_data->views[frame_slot],
                                             runtime_data->view_count[frame_slot],
                                             pass->user_data);
    if (cmd->annotation) {
        cmd->annotation->EndEvent();
    }
    return res;
 }
 static bool IsCopyResourcePossible(const rt_render_target *backbuffer) {
    DXGI_SWAP_CHAIN_DESC scd;
    g_gpu.swap_chain.swap_chain->GetDesc(&scd);
    D3D11_TEXTURE2D_DESC td;
    backbuffer->texture->GetDesc(&td);
    // This is more strict than necessary, because the formats could also be from the same group
    return scd.BufferDesc.Width == td.Width && scd.BufferDesc.Height == td.Height &&
           scd.SampleDesc.Count == td.SampleDesc.Count && scd.BufferDesc.Format == td.Format;
 }
 extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph, unsigned int frame_id) {
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    if (!temp.arena)
        return RT_OUT_OF_MEMORY;
    // Alloc a command buffer for every pass
    rt_command_buffer_handle *cmdbufs =
        RT_ARENA_PUSH_ARRAY(temp.arena, rt_command_buffer_handle, render_graph->pass_count);
    rt_result res = rtAllocCommandBuffers(render_graph->pass_count, cmdbufs);
    if (res != RT_SUCCESS) {
        rtReturnTemporaryArena(temp);
        return res;
    }
    for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
        rt_render_pass *pass = &render_graph->passes[i];
        res = ExecutePass(pass, cmdbufs[i], frame_id);
        if (res != RT_SUCCESS)
            break;
    }
    if (res == RT_SUCCESS) {
        res = rtSubmitCommandBuffers(render_graph->pass_count, cmdbufs);
    }
    // Copy backbuffer to swapchain
    rt_render_target *backbuffer =
        rtGetRenderTarget(render_graph->render_targets[render_graph->backbuffer_index]);
    if (!backbuffer) {
        rtReturnTemporaryArena(temp);
        return RT_INVALID_VALUE;
    }
    ID3D11Texture2D *frame_buffer;
    if (FAILED(g_gpu.swap_chain.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) {
        rtReportError("dx11", "Failed to retrieve the backbuffer.");
        rtReturnTemporaryArena(temp);
        return RT_UNKNOWN_ERROR;
    }
    if (IsCopyResourcePossible(backbuffer)) {
        g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
    } else {
        // NOTE(Kevin): The most flexible solution would probably be a fullscreen tri draw
        // that implements a blit.
        // Another idea would be a compute shader that does a copy&filter but that requires more
        // work
        RT_NOT_IMPLEMENTED;
    }
    rtReturnTemporaryArena(temp);
    return res;
 }
--- a/src/renderer/dx11/render_targets.cpp
+++ b/src/renderer/dx11/render_targets.cpp
@ -1,182 +0,0 @@
 #include <d3d11.h>
 #include <d3d11_1.h>
 #include "gfx/renderer_api.h"
 #include "runtime/config.h"
 #include "runtime/handles.h"
 #include "runtime/threading_helpers.hpp"
 #include "device_objects.hpp"
 #include "gpu.hpp"
 RT_CVAR_I(rt_Dx11MaxRenderTargets,
          "Maximum number of simultaneously existing render targets. Default: 128",
          128);
 static rt_render_target *_render_targets;
 static rt_render_target *_first_free;
 static rt_mutex *_lock;
 rt_result InitRenderTargetManagement() {
    _render_targets = reinterpret_cast<rt_render_target *>(
        calloc((size_t)rt_Dx11MaxRenderTargets.i, sizeof(rt_render_target)));
    if (!_render_targets) {
        return RT_OUT_OF_MEMORY;
    }
    _lock = rtCreateMutex();
    if (!_lock) {
        free(_render_targets);
        return RT_UNKNOWN_ERROR;
    }
    _render_targets[1].rtv     = g_gpu.swap_chain.rtv.Get();
    _render_targets[1].format  = RT_PIXEL_FORMAT_B8G8R8A8_SRGB;
    _render_targets[1].version = 1;
    _first_free = _render_targets + 2;
    for (int i = 0; i < rt_Dx11MaxRenderTargets.i; ++i) {
        _render_targets[i].next_free = &_render_targets[i + 1];
    }
    return RT_SUCCESS;
 }
 void ShutdownRenderTargetManagement() {
    // Swapchain rtv in slot 1 will be released elsewhere
    for (int i = 2; i < rt_Dx11MaxRenderTargets.i; ++i) {
        if (_render_targets[i].rtv)
            _render_targets[i].rtv->Release();
        if (_render_targets[i].dsv)
            _render_targets[i].dsv->Release();
        if (_render_targets[i].texture)
            _render_targets[i].texture->Release();
    }
    free(_render_targets);
    rtDestroyMutex(_lock);
 }
 rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info) {
    rt_render_target *slot = nullptr;
    {
        auto lock_guard = rtAutoLock(_lock);
        slot            = _first_free;
        _first_free     = slot->next_free;
    }
    if (!slot) {
        rtLog("dx11",
              "Could not create a new render target, because all available slots are currently in "
              "use.");
        return RT_INVALID_HANDLE;
    }
    slot->format = info.format;
    uint32_t swapchain_width = 0, swapchain_height = 0;
    if (info.width == RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
        info.height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
        DXGI_SWAP_CHAIN_DESC desc;
        g_gpu.swap_chain.swap_chain->GetDesc(&desc);
        swapchain_width  = desc.BufferDesc.Width;
        swapchain_height = desc.BufferDesc.Height;
    }
    if (!rtIsDepthFormat(info.format)) {
        D3D11_TEXTURE2D_DESC tex_desc = {};
        tex_desc.Width =
            (info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width;
        tex_desc.Height =
            (info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height;
        tex_desc.MipLevels          = 1;
        tex_desc.ArraySize          = 1;
        tex_desc.Format             = rtConvertPixelFormat(info.format);
        tex_desc.SampleDesc.Count   = 1;
        tex_desc.SampleDesc.Quality = 0;
        tex_desc.Usage              = D3D11_USAGE_DEFAULT; // read and write
        tex_desc.BindFlags          = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
        tex_desc.CPUAccessFlags     = 0; // none
        tex_desc.MiscFlags          = 0;
        if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) {
            rtLog("dx11", "Failed to create backing texture for render target %s", info.name);
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
        D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
        rtv_desc.Format                        = rtConvertPixelFormat(info.format);
        rtv_desc.ViewDimension                 = D3D11_RTV_DIMENSION_TEXTURE2D;
        rtv_desc.Texture2D.MipSlice            = 0;
        if (FAILED(g_gpu.device->CreateRenderTargetView(slot->texture, &rtv_desc, &slot->rtv))) {
            slot->texture->Release();
            rtLog("dx11",
                  "Failed to create the render target view for render target %s",
                  info.name);
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
        slot->version  = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
        uint32_t index = static_cast<uint32_t>(slot - _render_targets);
        return {.version = slot->version, .index = index};
    } else {
        D3D11_TEXTURE2D_DESC tex_desc = {};
        tex_desc.Width =
            (info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width;
        tex_desc.Height =
            (info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height;
        tex_desc.MipLevels          = 1;
        tex_desc.ArraySize          = 1;
        tex_desc.Format             = rtConvertPixelFormat(info.format);
        tex_desc.SampleDesc.Count   = 1;
        tex_desc.SampleDesc.Quality = 0;
        tex_desc.Usage              = D3D11_USAGE_DEFAULT; // read and write
        tex_desc.BindFlags          = D3D11_BIND_DEPTH_STENCIL;
        tex_desc.CPUAccessFlags     = 0; // none
        tex_desc.MiscFlags          = 0;
        if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) {
            rtLog("dx11", "Failed to create backing texture for render target %s", info.name);
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
        D3D11_DEPTH_STENCIL_VIEW_DESC dsv_desc = {};
        dsv_desc.Format                        = rtConvertPixelFormat(info.format);
        dsv_desc.Flags                         = 0;
        dsv_desc.ViewDimension                 = D3D11_DSV_DIMENSION_TEXTURE2D;
        dsv_desc.Texture2D.MipSlice            = 0;
        if (FAILED(g_gpu.device->CreateDepthStencilView(slot->texture, &dsv_desc, &slot->dsv))) {
            slot->texture->Release();
            rtLog("dx11",
                  "Failed to create the depth stencil view for render target %s",
                  info.name);
            auto lg         = rtAutoLock(_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            return RT_INVALID_HANDLE;
        }
        slot->version  = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
        uint32_t index = static_cast<uint32_t>(slot - _render_targets);
        return {.version = slot->version, .index = index};
    }
 }
 void rtDestroyRenderTarget(rt_render_target_handle handle) {
    RT_UNUSED(handle);
 }
 rt_render_target *rtGetRenderTarget(rt_render_target_handle handle) {
    if (!RT_IS_HANDLE_VALID(handle) || (int)handle.index >= rt_Dx11MaxRenderTargets.i)
        return nullptr;
    auto lg = rtAutoLock(_lock);
    if (_render_targets[handle.index].version != handle.version)
        return nullptr;
    return &_render_targets[handle.index];
 }
--- a/src/renderer/null/meson.build
+++ b/src/renderer/null/meson.build
@ -1,10 +0,0 @@
 null_renderer_lib = library('rtnull',
  'null.c',
  '../common/common_render_graph.c',
  include_directories : engine_incdir,
  link_with : runtime_lib,
  install : true)
 engine_libs += null_renderer_lib
 engine_lib_paths += null_renderer_lib.full_path()
--- a/src/renderer/null/null.c
+++ b/src/renderer/null/null.c
@ -1,159 +0,0 @@
 /* "Null" renderer implementation.
 * Useful for headless testing */
 #include "gfx/renderer_api.h"
 #include "runtime/runtime.h"
 #include "../common/common_render_graph.h"
 #define RETURN_HANDLE_STUB2(type, initial)                                                         \
    static unsigned int s_next = (initial);                                                        \
    return (type) { .index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX, .version = 1 }
 #define RETURN_HANDLE_STUB(type) RETURN_HANDLE_STUB2(type, 1)
 #define RETURN_HANDLE_ARRAY_STUB2(out, count, initial)                                             \
    static unsigned int s_next = (initial);                                                        \
    for (uint32_t i = 0; i < (count); ++i) {                                                       \
        (out)[i].index   = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX;                        \
        (out)[i].version = 1;                                                                      \
    }
 #define RETURN_HANDLE_ARRAY_STUB(out, count) RETURN_HANDLE_ARRAY_STUB2(out, count, 1)
 void RT_RENDERER_API_FN(RegisterCVars)(void) {
 }
 rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
    RT_UNUSED(info);
    return RT_SUCCESS;
 }
 void RT_RENDERER_API_FN(Shutdown)(void) {
 }
 unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
    return 2;
 }
 void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
    RT_UNUSED(frame_id);
 }
 void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
    RT_UNUSED(frame_id);
 }
 rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
    RT_UNUSED(info);
    RETURN_HANDLE_STUB(rt_pipeline_handle);
 }
 void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
    RT_UNUSED(handle);
 }
 rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
                                                  const rt_alloc_command_buffer_info *info,
                                                  rt_command_buffer_handle *p_command_buffers) {
    RT_UNUSED(info);
    RETURN_HANDLE_ARRAY_STUB(p_command_buffers, count)
    return RT_SUCCESS;
 }
 rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
                                                   const rt_submit_command_buffers_info *info) {
    RT_UNUSED(queue);
    RT_UNUSED(info);
    return RT_SUCCESS;
 }
 rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
                                            const rt_buffer_info *info,
                                            rt_buffer_handle *p_buffers) {
    RT_UNUSED(info);
    RETURN_HANDLE_ARRAY_STUB(p_buffers, count);
    return RT_SUCCESS;
 }
 void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
    RT_UNUSED(count);
    RT_UNUSED(buffers);
 }
 void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmd,
                                      const rt_cmd_begin_pass_info *info) {
    RT_UNUSED(cmd);
    RT_UNUSED(info);
 }
 void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmd) {
    RT_UNUSED(cmd);
 }
 void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmd,
                                                   rt_render_target_handle target,
                                                   rt_render_target_state state) {
    RT_UNUSED(cmd);
    RT_UNUSED(target);
    RT_UNUSED(state);
 }
 void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle,
                                                   rt_render_target_handle render_target) {
    RT_UNUSED(cmdbuf_handle);
    RT_UNUSED(render_target);
 }
 static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *info) {
    RETURN_HANDLE_STUB(rt_render_target_handle);
 }
 static int RequireExplicitSync(void) {
    return 0;
 }
 rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
    rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = CreateRenderTarget,
                                                      .RequireExplicitSynchronization =
                                                          RequireExplicitSync};
    return rtCreateRenderGraphBuilder(&cbs);
 }
 void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
    rtDestroyRenderGraphBuilder(builder);
 }
 rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
    RT_UNUSED(render_graph);
    return RT_SUCCESS;
 }
 void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
                                          uint32_t pass_id,
                                          rt_render_view view,
                                          unsigned int frame_id) {
    RT_UNUSED(render_graph);
    RT_UNUSED(pass_id);
    RT_UNUSED(view);
    RT_UNUSED(frame_id);
 }
 void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) {
    RT_UNUSED(graph);
 }
 void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle,
                                         rt_pipeline_handle pipeline_handle) {
 }
 void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle,
                                              uint32_t first_binding,
                                              uint32_t count,
                                              const rt_buffer_handle *buffers,
                                              const uint64_t *_offsets) {
 }
 void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle,
                                 uint32_t first,
                                 uint32_t count) {
 }
--- a/src/renderer/vk/buffers.c
+++ b/src/renderer/vk/buffers.c
@ -1,219 +0,0 @@
 #include "command_buffers.h"
 #include "gpu.h"
 #include "transfers.h"
 #include "resources.h"
 #include "gfx/renderer_api.h"
 #include "runtime/config.h"
 #include "runtime/threading.h"
 #include <stdlib.h>
 #include <string.h>
 RT_CVAR_I(rt_VkMaxBufferCount, "Number of slots for gpu buffers. Default: 1024.", 1024);
 typedef struct rt_buffer_data_s {
    rt_buffer data;
    uint32_t version;
    struct rt_buffer_data_s *next_free;
 } rt_buffer_data;
 static rt_buffer_data *_buffers;
 static rt_buffer_data *_first_free;
 static rt_mutex *_list_lock;
 rt_result InitBufferManagement(void) {
    size_t n = (size_t)rt_VkMaxBufferCount.i;
    _buffers = calloc(n, sizeof(rt_buffer_data));
    if (!_buffers)
        return RT_OUT_OF_MEMORY;
    _first_free = &_buffers[1];
    for (size_t i = 1; i < n - 1; ++i)
        _buffers[i].next_free = &_buffers[i + 1];
    _list_lock = rtCreateMutex();
    return RT_SUCCESS;
 }
 void ShutdownBufferManagement(void) {
    for (int i = 0; i < rt_VkMaxBufferCount.i; ++i) {
        if (_buffers[i].data.buffer == VK_NULL_HANDLE)
            continue;
        vmaDestroyBuffer(g_gpu.allocator, _buffers[i].data.buffer, _buffers[i].data.allocation);
        rtDestroyRWLock(&_buffers[i].data.lock);
        memset(&_buffers[i], 0, sizeof(_buffers[i]));
    }
    free(_buffers);
    _first_free = NULL;
    rtDestroyMutex(_list_lock);
 }
 void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers);
 static void UploadViaMap(rt_buffer_data *buffer, const void *data, size_t size) {
    rtLockWrite(&buffer->data.lock);
    void *dev_mem = NULL;
    if (vmaMapMemory(g_gpu.allocator, buffer->data.allocation, &dev_mem) != VK_SUCCESS) {
        rtReportError("vk", "Unable to map buffer for upload");
        rtUnlockWrite(&buffer->data.lock);
        return;
    }
    memcpy(dev_mem, data, size);
    vmaUnmapMemory(g_gpu.allocator, buffer->data.allocation);
    if (!buffer->data.coherent)
        vmaFlushAllocation(g_gpu.allocator, buffer->data.allocation, 0, VK_WHOLE_SIZE);
    rtUnlockWrite(&buffer->data.lock);
 }
 /* Convenience function that decides between mapping or uploading via transfer buffer */
 static void UploadData(rt_buffer_data *buffer, const void *data, size_t size) {
    if (buffer->data.mappable)
        UploadViaMap(buffer, data, size);
    else
        rtUploadToBuffer(buffer->data.buffer,
                         buffer->data.allocation,
                         buffer->data.owner,
                         data,
                         size);
 }
 rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
                                            const rt_buffer_info *info,
                                            rt_buffer_handle *p_buffers) {
    for (uint32_t i = 0; i < count; ++i) {
        rtLockMutex(_list_lock);
        rt_buffer_data *slot = _first_free;
        if (!slot) {
            rtUnlockMutex(_list_lock);
            if (i > 0)
                rtRenDestroyBuffers(i, p_buffers);
            return RT_OUT_OF_MEMORY;
        }
        _first_free = slot->next_free;
        rtUnlockMutex(_list_lock);
        VkBufferUsageFlags buffer_usage = 0;
        switch (info->type) {
        case RT_BUFFER_TYPE_VERTEX:
            buffer_usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
            break;
        case RT_BUFFER_TYPE_INDEX:
            buffer_usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
            break;
        case RT_BUFFER_TYPE_STORAGE:
            buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
            break;
        case RT_BUFFER_TYPE_UNIFORM:
            buffer_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
            break;
        }
        buffer_usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
        VkBufferCreateInfo buffer_info = {
            .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
            .size        = info->size,
            .usage       = buffer_usage,
            .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
        };
        VmaMemoryUsage alloc_usage           = 0;
        VmaAllocationCreateFlags alloc_flags = 0;
        switch (info->usage) {
        case RT_BUFFER_USAGE_STATIC:
            alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
            alloc_flags = 0;
            break;
        case RT_BUFFER_USAGE_DYNAMIC:
            alloc_usage = VMA_MEMORY_USAGE_AUTO;
            alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
                          VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
            break;
        case RT_BUFFER_USAGE_TRANSIENT:
            alloc_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
            alloc_flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
            break;
        }
        VmaAllocationCreateInfo alloc_info = {.usage = alloc_usage, .flags = alloc_flags};
        VkResult res = vmaCreateBuffer(g_gpu.allocator,
                                       &buffer_info,
                                       &alloc_info,
                                       &slot->data.buffer,
                                       &slot->data.allocation,
                                       NULL);
        if (res != VK_SUCCESS) {
            rtReportError("vk", "Failed to create a buffer: %u", res);
            rtLockMutex(_list_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            rtUnlockMutex(_list_lock);
            if (i > 0)
                rtRenDestroyBuffers(i, p_buffers);
            return RT_UNKNOWN_ERROR;
        }
        rt_create_rwlock_result lock_res = rtCreateRWLock();
        if (!lock_res.ok) {
            rtReportError("vk", "Failed to create lock for buffer.");
            vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation);
            rtLockMutex(_list_lock);
            slot->next_free = _first_free;
            _first_free     = slot;
            rtUnlockMutex(_list_lock);
            if (i > 0)
                rtRenDestroyBuffers(i, p_buffers);
            return RT_UNKNOWN_ERROR;
        }
        VkMemoryPropertyFlags properties;
        vmaGetAllocationMemoryProperties(g_gpu.allocator, slot->data.allocation, &properties);
        slot->data.mappable = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
        slot->data.coherent = (properties & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
        slot->data.owner = RT_VK_UNOWNED;
        slot->data.state = RT_BUFFER_STATE_NOT_USED;
        if (info->data)
            UploadData(slot, info->data, info->size);
        ptrdiff_t index      = slot - _buffers;
        p_buffers[i].index   = (uint32_t)index;
        p_buffers[i].version = slot->version;
    }
    return RT_SUCCESS;
 }
 void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t count, rt_buffer_handle *buffers) {
    for (uint32_t i = 0; i < count; ++i) {
        if (buffers[i].index >= (uint32_t)rt_VkMaxBufferCount.i)
            continue;
        rt_buffer_data *slot = &_buffers[buffers[i].index];
        if (slot->version != buffers[i].version) {
            rtLog("vk", "Tried to destroy a buffer with an invalid handle (version mismatch).");
            continue;
        }
        rtLockWrite(&slot->data.lock);
        slot->version = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
        vmaDestroyBuffer(g_gpu.allocator, slot->data.buffer, slot->data.allocation);
        slot->data.buffer = VK_NULL_HANDLE;
        slot->data.allocation = VK_NULL_HANDLE;
        rtUnlockWrite(&slot->data.lock);
        rtDestroyRWLock(&slot->data.lock);
        rtLockMutex(_list_lock);
        slot->next_free = _first_free;
        _first_free     = slot;
        rtUnlockMutex(_list_lock);
    }
 }
 rt_buffer *rtGetBuffer(rt_buffer_handle handle) {
    if (handle.index >= (uint32_t)rt_VkMaxBufferCount.i)
        return NULL;
    rt_buffer_data *slot = &_buffers[handle.index];
    if (slot->version != handle.version) {
        rtLog("vk", "Tried to access a buffer with an invalid handle (version mismatch).");
        return NULL;
    }
    return &slot->data;
 }
--- a/src/renderer/vk/command_buffers.c
+++ b/src/renderer/vk/command_buffers.c
@ -1,490 +0,0 @@
 #include "gpu.h"
 #include "gpu_sync.h"
 #include "swapchain.h"
 #include "runtime/atomics.h"
 #include "runtime/config.h"
 #include "runtime/handles.h"
 #include "runtime/mem_arena.h"
 #include "runtime/runtime.h"
 #include "gfx/renderer_api.h"
 #include <stdlib.h>
 RT_CVAR_I(rt_VkMaxCommandPools,
          "Maximum number of command pools that can be created. Default: 32",
          32);
 RT_CVAR_I(
    rt_VkCommandBufferRingBufferSize,
    "Size of the ring buffer used to store command buffers. Must be a power of two! Default: 512",
    512);
 typedef struct {
    VkCommandPool pools[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT * 3];
    uint32_t distinct_pool_count;
    VkCommandPool *compute_pools;
    VkCommandPool *graphics_pools;
    VkCommandPool *transfer_pools;
 } rt_thread_pools;
 typedef struct {
    VkCommandBuffer command_buffer;
    uint32_t version;
    rt_gpu_queue target_queue;
 } rt_command_buffer;
 static rt_thread_pools *_pools;
 static uint32_t _next_pools;
 static RT_THREAD_LOCAL unsigned int t_first_pool;
 static rt_command_buffer *_command_buffers;
 /* We let this overflow on its own. Use MOD rt_VkCommandBufferRingBufferSize to get the actual
 * index. */
 static uint32_t _next_command_buffer;
 rt_result InitCommandBufferManagement(void) {
    _pools = calloc((size_t)rt_VkMaxCommandPools.i, sizeof(rt_thread_pools));
    if (!_pools)
        return RT_OUT_OF_MEMORY;
    _command_buffers =
        calloc((size_t)rt_VkCommandBufferRingBufferSize.i, sizeof(rt_command_buffer));
    if (!_command_buffers) {
        free(_pools);
        return RT_OUT_OF_MEMORY;
    }
    /* We keep 0 free as a "Not initialized" value for t_first_pool.
     * The atomicinc used to acquire a pool returns the incremented value, so 0 is never returned.
     */
    _next_pools = 0;
    return RT_SUCCESS;
 }
 static void DestroyPools(rt_thread_pools *pools) {
    for (uint32_t j = 0; j < pools->distinct_pool_count; ++j)
        vkDestroyCommandPool(g_gpu.device, pools->pools[j], g_gpu.alloc_cb);
    free(_pools);
 }
 void ShutdownCommandBufferManagement(void) {
    /* _next_pools is the number of existing pools */
    for (uint32_t i = 1; i < _next_pools; ++i) {
        DestroyPools(&_pools[i]);
    }
 }
 void rtResetCommandPools(unsigned int frame_id) {
    unsigned int pool_idx = frame_id % g_gpu.max_frames_in_flight;
    for (uint32_t i = 1; i < _next_pools; ++i) {
        if (vkResetCommandPool(g_gpu.device,
                               _pools[i].graphics_pools[pool_idx],
                               VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
            rtLog("vk", "Failed to reset graphics pool slot %u index %u", i, pool_idx);
        }
        if (_pools[i].compute_pools != _pools[i].graphics_pools) {
            if (vkResetCommandPool(g_gpu.device,
                                   _pools[i].compute_pools[pool_idx],
                                   VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
                rtLog("vk", "Failed to reset compute pool slot %u index %u", i, pool_idx);
            }
        }
        if (_pools[i].transfer_pools != _pools[i].graphics_pools &&
            _pools[i].transfer_pools != _pools[i].compute_pools) {
            if (vkResetCommandPool(g_gpu.device,
                                   _pools[i].transfer_pools[pool_idx],
                                   VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != VK_SUCCESS) {
                rtLog("vk", "Failed to reset transfer pool slot %u index %u", i, pool_idx);
            }
        }
    }
 }
 static rt_result CreatePools(rt_thread_pools *pools) {
    /* Graphics pools */
    pools->graphics_pools                 = pools->pools;
    pools->distinct_pool_count            = 0;
    VkCommandPoolCreateInfo graphics_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
                                             .queueFamilyIndex = g_gpu.graphics_family,
                                             .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT};
    for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
        if (vkCreateCommandPool(g_gpu.device,
                                &graphics_info,
                                g_gpu.alloc_cb,
                                &pools->graphics_pools[i]) != VK_SUCCESS) {
            rtLog("vk", "Failed to create a graphics command pool.");
            DestroyPools(pools);
            return RT_UNKNOWN_ERROR;
        }
        ++pools->distinct_pool_count;
    }
    if (g_gpu.compute_family != g_gpu.graphics_family) {
        VkCommandPoolCreateInfo compute_info = {
            .sType            = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
            .queueFamilyIndex = g_gpu.compute_family,
            .flags            = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
        };
        pools->compute_pools = &pools->pools[pools->distinct_pool_count];
        for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
            if (vkCreateCommandPool(g_gpu.device,
                                    &compute_info,
                                    g_gpu.alloc_cb,
                                    &pools->compute_pools[i]) != VK_SUCCESS) {
                rtLog("vk", "Failed to create a compute command pool.");
                DestroyPools(pools);
                return RT_UNKNOWN_ERROR;
            }
            ++pools->distinct_pool_count;
        }
    } else {
        pools->compute_pools = pools->graphics_pools;
    }
    if (g_gpu.transfer_family != g_gpu.graphics_family &&
        g_gpu.transfer_family != g_gpu.compute_family) {
        VkCommandPoolCreateInfo transfer_info = {
            .sType            = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
            .queueFamilyIndex = g_gpu.transfer_family,
            .flags            = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
        };
        pools->transfer_pools = &pools->pools[pools->distinct_pool_count];
        for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
            if (vkCreateCommandPool(g_gpu.device,
                                    &transfer_info,
                                    g_gpu.alloc_cb,
                                    &pools->transfer_pools[i]) != VK_SUCCESS) {
                rtLog("vk", "Failed to create a transfer command pool.");
                DestroyPools(pools);
                return RT_UNKNOWN_ERROR;
            }
            ++pools->distinct_pool_count;
        }
    } else if (g_gpu.transfer_family == g_gpu.graphics_family) {
        pools->transfer_pools = pools->graphics_pools;
    } else if (g_gpu.transfer_family == g_gpu.compute_family) {
        pools->transfer_pools = pools->compute_pools;
    }
    return RT_SUCCESS;
 }
 rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
                                                  const rt_alloc_command_buffer_info *info,
                                                  rt_command_buffer_handle *p_command_buffers) {
    rt_thread_pools *pools = &_pools[t_first_pool];
    if (t_first_pool == 0) {
        /* Acquire pools */
        t_first_pool = rtAtomic32Inc(&_next_pools);
        RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
        pools                = &_pools[t_first_pool];
        rt_result create_res = CreatePools(pools);
        if (create_res != RT_SUCCESS)
            return create_res;
    }
    if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
        return RT_OUT_OF_MEMORY;
    uint32_t frame_id = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
    rt_result result  = RT_SUCCESS;
    /* TODO: We should probably batch allocations of the same type */
    uint32_t mod   = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
    uint32_t start = rtAtomic32FetchAdd(&_next_command_buffer, count);
    for (uint32_t i = 0; i < count; ++i) {
        uint32_t slot = (start + i) % mod;
        _command_buffers[slot].version =
            (_command_buffers[slot].version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
        if (_command_buffers[slot].version == 0)
            _command_buffers[slot].version = 1;
        VkCommandPool pool = pools->graphics_pools[frame_id];
        if (info[i].target_queue == RT_COMPUTE_QUEUE)
            pool = pools->compute_pools[frame_id];
        else if (info[i].target_queue == RT_TRANSFER_QUEUE)
            pool = pools->transfer_pools[frame_id];
        _command_buffers[slot].target_queue = info[i].target_queue;
        VkCommandBufferAllocateInfo alloc_info = {
            .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
            .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
            .commandBufferCount = 1,
            .commandPool        = pool,
        };
        if (vkAllocateCommandBuffers(g_gpu.device,
                                     &alloc_info,
                                     &_command_buffers[slot].command_buffer) != VK_SUCCESS) {
            result = RT_UNKNOWN_ERROR;
            break;
        }
        VkCommandBufferBeginInfo begin_info = {
            .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
            .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
        };
        vkBeginCommandBuffer(_command_buffers[slot].command_buffer, &begin_info);
        p_command_buffers[i].index   = (slot + 1);
        p_command_buffers[i].version = _command_buffers[slot].version;
    }
    return result;
 }
 #define RT_VK_LOG_SUBMIT_INFO 1
 rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
                                                   const rt_submit_command_buffers_info *info) {
    uint32_t count     = info->command_buffer_count;
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    if (!temp.arena)
        return RT_OUT_OF_MEMORY;
    rt_result result     = RT_SUCCESS;
    VkQueue target_queue = rtGetQueue(queue);
    VkCommandBufferSubmitInfo *command_buffers =
        RT_ARENA_PUSH_ARRAY(temp.arena, VkCommandBufferSubmitInfo, count);
    if (!command_buffers) {
        result = RT_OUT_OF_MEMORY;
        goto out;
    }
    VkSemaphoreSubmitInfo *wait_semaphores =
        RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->wait_semaphore_count);
    if (!wait_semaphores && info->wait_semaphore_count > 0) {
        result = RT_OUT_OF_MEMORY;
        goto out;
    }
    VkSemaphoreSubmitInfo *signal_semaphores =
        RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, info->signal_semaphore_count);
    if (!signal_semaphores && info->signal_semaphore_count > 0) {
        result = RT_OUT_OF_MEMORY;
        goto out;
    }
    uint32_t wait_count   = info->wait_semaphore_count;
    uint32_t signal_count = info->signal_semaphore_count;
    for (uint32_t i = 0; i < wait_count; ++i) {
        VkSemaphoreSubmitInfo semaphore_info = {
            .sType       = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
            .semaphore   = rtGetSemaphore(info->wait_semaphores[i]),
            .value       = info->wait_values[i],
            .stageMask   = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
            .deviceIndex = 0,
        };
        wait_semaphores[i] = semaphore_info;
    }
    for (uint32_t i = 0; i < signal_count; ++i) {
        VkSemaphoreSubmitInfo semaphore_info = {
            .sType       = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
            .semaphore   = rtGetSemaphore(info->signal_semaphores[i]),
            .value       = info->signal_values[i],
            .stageMask   = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
            .deviceIndex = 0,
        };
        signal_semaphores[i] = semaphore_info;
    }
    for (uint32_t i = 0; i < count; ++i) {
        if (!RT_IS_HANDLE_VALID(info->command_buffers[i])) {
            rtLog("vk", "Tried to submit an invalid command buffer.");
            result = RT_INVALID_VALUE;
            goto out;
        }
        uint32_t slot = info->command_buffers[i].index - 1;
        if (_command_buffers[slot].version != info->command_buffers[i].version) {
            rtLog("vk",
                  "Mismatch between handle version and stored version while submitting a command "
                  "buffer");
            result = RT_INVALID_VALUE;
            goto out;
        }
        if (_command_buffers[slot].target_queue != queue) {
            rtLog("vk", "Mismatch between command buffer target queue and submit target queue.");
            result = RT_INVALID_VALUE;
            goto out;
        }
        command_buffers[i].sType         = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO;
        command_buffers[i].pNext         = NULL;
        command_buffers[i].deviceMask    = 0;
        command_buffers[i].commandBuffer = _command_buffers[slot].command_buffer;
        vkEndCommandBuffer(command_buffers[i].commandBuffer);
    }
 #if RT_VK_LOG_SUBMIT_INFO
    {
        const char *queue_str = "<invalid>";
        if (queue == RT_GRAPHICS_QUEUE)
            queue_str = "GRAPHICS";
        else if (queue == RT_COMPUTE_QUEUE)
            queue_str = "COMPUTE";
        else if (queue == RT_TRANSFER_QUEUE)
            queue_str = "TRANSFER";
        rtLog("vk", "Submit Info");
        rtLog("vk", "Queue: %s", queue_str);
        rtLog("vk", "Command Buffers: %u", count);
        rtLog("vk", "  - TODO: More Info");
        rtLog("vk", "Wait Semaphores:");
        for (uint32_t i = 0; i < wait_count; ++i) {
            rtLog("vk",
                  " - %u:%u Value %u",
                  info->wait_semaphores[i].version,
                  info->wait_semaphores[i].index,
                  info->wait_values[i]);
        }
        rtLog("vk", "Signal Semaphores:");
        for (uint32_t i = 0; i < signal_count; ++i) {
            rtLog("vk",
                  " - %u:%u Value %u",
                  info->signal_semaphores[i].version,
                  info->signal_semaphores[i].index,
                  info->signal_values[i]);
        }
    }
 #endif
    VkSubmitInfo2 submit_info = {
        .sType                    = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
        .waitSemaphoreInfoCount   = wait_count,
        .signalSemaphoreInfoCount = signal_count,
        .pWaitSemaphoreInfos      = wait_semaphores,
        .pSignalSemaphoreInfos    = signal_semaphores,
        .commandBufferInfoCount   = count,
        .pCommandBufferInfos      = command_buffers,
    };
    if (vkQueueSubmit2(target_queue, 1, &submit_info, VK_NULL_HANDLE) != VK_SUCCESS) {
        rtLog("vk", "vkQueueSubmit failed.");
        result = RT_UNKNOWN_ERROR;
    }
 out:
    rtReturnTemporaryArena(temp);
    return result;
 }
 VkCommandBuffer rtGetCommandBuffer(rt_command_buffer_handle cmdbuf) {
    uint32_t mod = (uint32_t)rt_VkCommandBufferRingBufferSize.i;
    if (!RT_IS_HANDLE_VALID(cmdbuf))
        return VK_NULL_HANDLE;
    uint32_t slot = (cmdbuf.index - 1) % mod;
    if (_command_buffers[slot].version != cmdbuf.version) {
        return VK_NULL_HANDLE;
    }
    return _command_buffers[slot].command_buffer;
 }
 VkCommandBuffer rtAllocSingleCommandBuffer(rt_gpu_queue queue) {
    rt_thread_pools *pools = &_pools[t_first_pool];
    if (t_first_pool == 0) {
        /* Acquire pools */
        t_first_pool = rtAtomic32Inc(&_next_pools);
        RT_ASSERT((int)t_first_pool < rt_VkMaxCommandPools.i, "Too many command pools created.");
        pools                = &_pools[t_first_pool];
        rt_result create_res = CreatePools(pools);
        if (create_res != RT_SUCCESS)
            return VK_NULL_HANDLE;
    }
    if ((int)t_first_pool >= rt_VkMaxCommandPools.i)
        return VK_NULL_HANDLE;
    uint32_t frame_id  = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
    VkCommandPool pool = pools->graphics_pools[frame_id];
    if (queue == RT_COMPUTE_QUEUE)
        pool = pools->compute_pools[frame_id];
    else if (queue == RT_TRANSFER_QUEUE)
        pool = pools->transfer_pools[frame_id];
    VkCommandBufferAllocateInfo alloc_info = {
        .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
        .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
        .commandBufferCount = 1,
        .commandPool        = pool,
    };
    VkCommandBuffer cmdbuf;
    if (vkAllocateCommandBuffers(g_gpu.device, &alloc_info, &cmdbuf) != VK_SUCCESS) {
        return VK_NULL_HANDLE;
    }
    return cmdbuf;
 }
 rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
                                      const VkSemaphore *wait_semaphores,
                                      const uint32_t *wait_values,
                                      uint32_t wait_semaphore_count,
                                      const VkSemaphore *signal_semaphores,
                                      const uint32_t *signal_values,
                                      uint32_t signal_semaphore_count,
                                      rt_gpu_queue queue,
                                      VkFence fence) {
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    if (!temp.arena)
        return RT_OUT_OF_MEMORY;
    VkQueue target_queue = rtGetQueue(queue);
    rt_result result     = RT_SUCCESS;
    VkSemaphoreSubmitInfo *wait_semaphore_info =
        RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, wait_semaphore_count);
    if (!wait_semaphore_info && wait_semaphore_count > 0) {
        result = RT_OUT_OF_MEMORY;
        goto out;
    }
    VkSemaphoreSubmitInfo *signal_semaphore_info =
        RT_ARENA_PUSH_ARRAY(temp.arena, VkSemaphoreSubmitInfo, signal_semaphore_count);
    if (!signal_semaphore_info && signal_semaphore_count > 0) {
        result = RT_OUT_OF_MEMORY;
        goto out;
    }
    uint32_t wait_count   = wait_semaphore_count;
    uint32_t signal_count = signal_semaphore_count;
    for (uint32_t i = 0; i < wait_count; ++i) {
        VkSemaphoreSubmitInfo semaphore_info = {
            .sType       = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
            .semaphore   = wait_semaphores[i],
            .value       = (wait_values) ? wait_values[i] : 0,
            .stageMask   = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
            .deviceIndex = 0,
        };
        wait_semaphore_info[i] = semaphore_info;
    }
    for (uint32_t i = 0; i < signal_count; ++i) {
        VkSemaphoreSubmitInfo semaphore_info = {
            .sType       = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
            .semaphore   = signal_semaphores[i],
            .value       = (signal_values) ? signal_values[i] : 0,
            .stageMask   = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
            .deviceIndex = 0,
        };
        signal_semaphore_info[i] = semaphore_info;
    }
    VkCommandBufferSubmitInfo command_buffer_info = {
        .sType         = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
        .deviceMask    = 0,
        .commandBuffer = command_buffer,
    };
    VkSubmitInfo2 submit_info = {
        .sType                    = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
        .waitSemaphoreInfoCount   = wait_count,
        .signalSemaphoreInfoCount = signal_count,
        .pWaitSemaphoreInfos      = wait_semaphore_info,
        .pSignalSemaphoreInfos    = signal_semaphore_info,
        .commandBufferInfoCount   = 1,
        .pCommandBufferInfos      = &command_buffer_info,
    };
    if (vkQueueSubmit2(target_queue, 1, &submit_info, fence) != VK_SUCCESS) {
        rtLog("vk", "vkQueueSubmit failed.");
        result = RT_UNKNOWN_ERROR;
    }
 out:
    rtReturnTemporaryArena(temp);
    return result;
 }
--- a/src/renderer/vk/command_buffers.h
+++ b/src/renderer/vk/command_buffers.h
@ -1,25 +0,0 @@
 #ifndef RT_COMMAND_BUFFERS_H
 #define RT_COMMAND_BUFFERS_H
 #include "gfx/renderer_api.h"
 #include "runtime/runtime.h"
 #include <volk/volk.h>
 void rtResetCommandPools(unsigned int frame_id);
 VkCommandBuffer rtGetCommandBuffer(rt_command_buffer_handle cmdbuf);
 VkCommandBuffer rtAllocSingleCommandBuffer(rt_gpu_queue queue);
 rt_result rtSubmitSingleCommandBuffer(VkCommandBuffer command_buffer,
                                      const VkSemaphore *wait_semaphores,
                                      const uint32_t *wait_values,
                                      uint32_t wait_semaphore_count,
                                      const VkSemaphore *signal_semaphores,
                                      const uint32_t *signal_values,
                                      uint32_t signal_semaphore_count,
                                      rt_gpu_queue queue,
                                      VkFence fence);
 #endif
--- a/src/renderer/vk/commands.c
+++ b/src/renderer/vk/commands.c
@ -1,510 +0,0 @@
 #include "command_buffers.h"
 #include "gpu.h"
 #include "render_targets.h"
 #include "swapchain.h"
 #include "gfx/renderer_api.h"
 #include "runtime/handles.h"
 #include "runtime/mem_arena.h"
 #include <string.h>
 #define USE_SIMPLE_SYNC_LIB 0
 #if USE_SIMPLE_SYNC_LIB
 #include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h"
 #include <stdbool.h>
 #endif
 /* Retrieve the VkCommandBuffer as varname, or return */
 #define GET_CMDBUF(varname, handle)                                                                \
    VkCommandBuffer varname = rtGetCommandBuffer((handle));                                        \
    if (varname == VK_NULL_HANDLE) {                                                               \
        rtLog("vk", "Failed to retrive VkCommandBuffer for %s", __FUNCTION__);                     \
        return;                                                                                    \
    }
 void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle cmdbuf_handle,
                                      const rt_cmd_begin_pass_info *info) {
    GET_CMDBUF(cmdbuf, cmdbuf_handle)
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    if (!temp.arena) {
        rtReportError("vk", "Failed to acquire a temporary arena for CmdBeginPass");
        return;
    }
 #ifdef RT_DEBUG
    VkDebugUtilsLabelEXT debug_label = {
        .sType      = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
        .color      = {0.39f, 0.58f, 0.92f, 1.f},
        .pLabelName = (info->name) ? info->name : "Unnamed pass",
    };
    vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
 #endif
    /* Acquire the necessary attachments */
    VkRenderingAttachmentInfo *colorbuffers =
        RT_ARENA_PUSH_ARRAY_ZERO(temp.arena, VkRenderingAttachmentInfo, info->color_buffer_count);
    for (uint32_t i = 0; i < info->color_buffer_count; ++i) {
        VkImageView image_view = VK_NULL_HANDLE;
        if (RT_IS_HANDLE_VALID(info->color_buffers[i])) {
            rt_render_target *rt = rtGetRenderTarget(info->color_buffers[i]);
            if (rt)
                image_view = rt->view[g_gpu.current_frame_id % g_gpu.max_frames_in_flight];
        }
        colorbuffers[i].sType       = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
        colorbuffers[i].pNext       = NULL;
        colorbuffers[i].imageView   = image_view;
        colorbuffers[i].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
        switch (info->color_buffer_loads[i]) {
        case RT_PASS_LOAD_MODE_CLEAR:
            colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
            break;
        case RT_PASS_LOAD_MODE_LOAD:
            colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
            break;
        default:
            colorbuffers[i].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
            break;
        }
        switch (info->color_buffer_writes[i]) {
        case RT_PASS_WRITE_MODE_STORE:
            colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
            break;
        case RT_PASS_WRITE_MODE_DISCARD:
            colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
            break;
        default:
            colorbuffers[i].storeOp = VK_ATTACHMENT_STORE_OP_NONE;
            break;
        }
        memcpy(&colorbuffers[i].clearValue.color.float32,
               info->color_buffer_clear_values[i].color.v,
               sizeof(float) * 4);
        /* TODO: Multisample resolve */
        colorbuffers[i].resolveMode        = VK_RESOLVE_MODE_NONE;
        colorbuffers[i].resolveImageView   = VK_NULL_HANDLE;
        colorbuffers[i].resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    }
    /* depth and stencil might be the same */
    VkRenderingAttachmentInfo *depth_stencil_buffer =
        RT_IS_HANDLE_VALID(info->depth_stencil_buffer)
            ? RT_ARENA_PUSH_STRUCT_ZERO(temp.arena, VkRenderingAttachmentInfo)
            : NULL;
    if (depth_stencil_buffer) {
        VkImageView image_view = VK_NULL_HANDLE;
        rt_render_target *rt   = rtGetRenderTarget(info->depth_stencil_buffer);
        if (rt)
            image_view = rt->view[g_gpu.current_frame_id % g_gpu.max_frames_in_flight];
        depth_stencil_buffer->sType       = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
        depth_stencil_buffer->pNext       = NULL;
        depth_stencil_buffer->imageView   = image_view;
        depth_stencil_buffer->imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
        switch (info->depth_stencil_buffer_load) {
        case RT_PASS_LOAD_MODE_CLEAR:
            depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
            break;
        case RT_PASS_LOAD_MODE_LOAD:
            depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
            break;
        default:
            depth_stencil_buffer->loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
            break;
        }
        switch (info->depth_stencil_buffer_write) {
        case RT_PASS_WRITE_MODE_STORE:
            depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
            break;
        case RT_PASS_WRITE_MODE_DISCARD:
            depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
            break;
        default:
            depth_stencil_buffer->storeOp = VK_ATTACHMENT_STORE_OP_NONE;
            break;
        }
        /* TODO: Multisample resolve */
        depth_stencil_buffer->resolveMode        = VK_RESOLVE_MODE_NONE;
        depth_stencil_buffer->resolveImageView   = VK_NULL_HANDLE;
        depth_stencil_buffer->resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    }
    VkRect2D render_area = {
        .offset = {  .x = info->render_area.offset.x,    .y = info->render_area.offset.y},
        .extent = {.width = info->render_area.size.x, .height = info->render_area.size.y}
    };
    if (render_area.extent.width == 0)
        render_area.extent.width = g_swapchain.extent.width;
    if (render_area.extent.height == 0)
        render_area.extent.height = g_swapchain.extent.height;
    VkRenderingInfo rendering_info = {
        .sType                = VK_STRUCTURE_TYPE_RENDERING_INFO,
        .pColorAttachments    = colorbuffers,
        .colorAttachmentCount = info->color_buffer_count,
        .pDepthAttachment     = depth_stencil_buffer,
        .pStencilAttachment   = depth_stencil_buffer,
        .layerCount           = 1,
        .renderArea           = render_area,
    };
    vkCmdBeginRendering(cmdbuf, &rendering_info);
    rtReturnTemporaryArena(temp);
 }
 void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle cmdbuf_handle) {
    GET_CMDBUF(cmdbuf, cmdbuf_handle)
    vkCmdEndRendering(cmdbuf);
 #ifdef RT_DEBUG
    vkCmdEndDebugUtilsLabelEXT(cmdbuf);
 #endif
 }
 /* Non-layout transition barrier */
 static void ExecuteRenderTargetBarrier(rt_render_target *rt,
                                       uint32_t image_index,
                                       VkCommandBuffer cmdbuf) { /* Determine old and new layout */
    VkImageLayout layout;
    switch (rt->states[image_index]) {
    case RT_RENDER_TARGET_STATE_ATTACHMENT:
        layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
        break;
    case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
    case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
        layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
        break;
    default:
        layout = VK_IMAGE_LAYOUT_UNDEFINED;
        break;
    }
 #ifdef RT_DEBUG
    VkDebugUtilsLabelEXT debug_label = {
        .sType      = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
        .pLabelName = "Render Target Barrier",
        .color      = {.13f, .54f, .13f, .75f},
    };
    vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
 #endif
    VkImageAspectFlags aspect_mask =
        (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
            ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
            : VK_IMAGE_ASPECT_COLOR_BIT;
    /* Determine access flags */
    VkPipelineStageFlags2 src_stage = 0;
    VkPipelineStageFlags2 dst_stage = 0;
    VkAccessFlags2 src_access       = 0;
    VkAccessFlags2 dst_access       = 0;
    if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
        src_access =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
                : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
        dst_access =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
                      VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT
                : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT |
                      VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT;
        src_stage =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
                      VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
                : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
        dst_stage =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
                      VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
                : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
    } else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
        src_access =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT
                : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT;
        dst_access = VK_ACCESS_2_SHADER_READ_BIT;
        src_stage  = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
        dst_stage  = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
    }
    VkImageMemoryBarrier2 image_barrier = {
        .sType         = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
        .srcStageMask  = src_stage,
        .srcAccessMask = src_access,
        .dstStageMask  = dst_stage,
        .dstAccessMask = dst_access,
        .oldLayout     = layout,
        .newLayout     = layout,
        .image         = rt->image[image_index],
 /* clang-format off */
                .subresourceRange = {
                    .aspectMask = aspect_mask,
                    .baseArrayLayer = 0,
                    .baseMipLevel = 0,
                    .layerCount = 1,
                    .levelCount = 1,
                },
  /* clang-format on */
    };
    VkDependencyInfo dep_info = {
        .sType                   = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
        .pImageMemoryBarriers    = &image_barrier,
        .imageMemoryBarrierCount = 1,
    };
    vkCmdPipelineBarrier2(cmdbuf, &dep_info);
 #ifdef RT_DEBUG
    vkCmdEndDebugUtilsLabelEXT(cmdbuf);
 #endif
 }
 static void DoLayoutTransition(rt_render_target *rt,
                               uint32_t image_index,
                               rt_render_target_state new_state,
                               VkCommandBuffer cmdbuf) {
 #if !USE_SIMPLE_SYNC_LIB
    /* Determine old and new layout */
    VkImageLayout old_layout;
    switch (rt->states[image_index]) {
    case RT_RENDER_TARGET_STATE_ATTACHMENT:
        old_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
        break;
    case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
    case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
        old_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
        break;
    default:
        old_layout = VK_IMAGE_LAYOUT_UNDEFINED;
        break;
    }
    VkImageLayout new_layout;
    switch (new_state) {
    case RT_RENDER_TARGET_STATE_ATTACHMENT:
        new_layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
        break;
    case RT_RENDER_TARGET_STATE_STORAGE_IMAGE:
    case RT_RENDER_TARGET_STATE_SAMPLED_IMAGE:
        new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
        break;
    default:
        new_layout = VK_IMAGE_LAYOUT_UNDEFINED;
    }
 #ifdef RT_DEBUG
    VkDebugUtilsLabelEXT debug_label = {
        .sType      = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
        .pLabelName = "Transition Render Target",
        .color      = {.13f, .54f, .13f, .75f},
    };
    vkCmdBeginDebugUtilsLabelEXT(cmdbuf, &debug_label);
 #endif
    VkImageAspectFlags aspect_mask =
        (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
            ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
            : VK_IMAGE_ASPECT_COLOR_BIT;
    VkPipelineStageFlags2 src_stage = 0;
    VkPipelineStageFlags2 dst_stage = 0;
    /* Determine access flags */
    VkAccessFlags2 src_access = 0;
    VkAccessFlags2 dst_access = 0;
    if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
        src_access =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
                : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
        src_stage =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
                      VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
                : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
    } else { /* SAMPLED_IMAGE or STORAGE_IMAGE */
        src_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
        src_stage =
            VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT; // VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
                                                    // VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
    }
    if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
        dst_access =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
                      VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT
                : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT;
        dst_stage = (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
                      VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
                : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
    } else {                                             /* SAMPLED_IMAGE or STORAGE_IMAGE */
        dst_access = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT;
        dst_stage = VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
        ;
    }
    VkImageMemoryBarrier2 image_barrier = {
        .sType         = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
        .srcStageMask  = src_stage,
        .srcAccessMask = src_access,
        .dstStageMask  = dst_stage,
        .dstAccessMask = dst_access,
        .oldLayout     = old_layout,
        .newLayout     = new_layout,
        .image         = rt->image[image_index],
 /* clang-format off */
                .subresourceRange = {
                    .aspectMask = aspect_mask,
                    .baseArrayLayer = 0,
                    .baseMipLevel = 0,
                    .layerCount = 1,
                    .levelCount = 1,
                },
  /* clang-format on */
    };
    VkDependencyInfo dep_info = {
        .sType                   = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
        .pImageMemoryBarriers    = &image_barrier,
        .imageMemoryBarrierCount = 1,
    };
    vkCmdPipelineBarrier2(cmdbuf, &dep_info);
 #ifdef RT_DEBUG
    vkCmdEndDebugUtilsLabelEXT(cmdbuf);
 #endif
 #else
    ThsvsAccessType prev_access;
    if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
        if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
            prev_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE;
        else
            prev_access = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE;
    } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) {
        prev_access = THSVS_ACCESS_NONE;
    } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) {
        prev_access = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER;
    } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) {
        prev_access = THSVS_ACCESS_ANY_SHADER_READ_OTHER;
    }
    ThsvsAccessType next_accesses[2];
    uint32_t next_access_count = 0;
    if (new_state == RT_RENDER_TARGET_STATE_ATTACHMENT) {
        if (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT) {
            next_accesses[0] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ;
            next_accesses[1] = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE;
        } else {
            next_accesses[0] = THSVS_ACCESS_COLOR_ATTACHMENT_READ;
            next_accesses[1] = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE;
        }
        next_access_count = 2;
    } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_INVALID) {
        next_accesses[0] = THSVS_ACCESS_NONE;
        next_access_count = 1;
    } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_SAMPLED_IMAGE) {
        next_accesses[0] = THSVS_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER;
        next_access_count = 1;
    } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE) {
        next_accesses[0]  = THSVS_ACCESS_ANY_SHADER_READ_OTHER;
        next_accesses[1]  = THSVS_ACCESS_ANY_SHADER_WRITE;
        next_access_count = 2;
    }
    VkImageAspectFlags aspect_mask =
        (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
            ? VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT
            : VK_IMAGE_ASPECT_COLOR_BIT;
    ThsvsImageBarrier barrier = {0};
    barrier.image                           = rt->image[image_index];
    barrier.pPrevAccesses   = &prev_access;
    barrier.prevAccessCount = 1;
    barrier.prevLayout      = THSVS_IMAGE_LAYOUT_OPTIMAL;
    barrier.nextAccessCount = next_access_count;
    barrier.pNextAccesses   = next_accesses;
    barrier.nextLayout                      = THSVS_IMAGE_LAYOUT_OPTIMAL;
    barrier.discardContents = false;
    barrier.subresourceRange.aspectMask = aspect_mask;
    barrier.subresourceRange.baseArrayLayer = 0;
    barrier.subresourceRange.layerCount     = 1;
    barrier.subresourceRange.baseMipLevel   = 0;
    barrier.subresourceRange.levelCount     = 1;
    thsvsCmdPipelineBarrier(cmdbuf, NULL, 0, NULL, 1, &barrier);
 #endif
    rt->states[image_index] = new_state;
 }
 void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdbuf_handle,
                                                   rt_render_target_handle render_target,
                                                   rt_render_target_state new_state) {
    GET_CMDBUF(cmdbuf, cmdbuf_handle)
    uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
    if (render_target.index == rtGetSwapchainRenderTarget().index) {
        image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
    }
    rt_render_target *rt = rtGetRenderTarget(render_target);
    if (!rt) {
        rtLog("vk", "Tried to transition invalid render target");
        return;
    }
    if (rt->states[image_index] != new_state)
        DoLayoutTransition(rt, image_index, new_state, cmdbuf);
    else
        ExecuteRenderTargetBarrier(rt, image_index, cmdbuf);
 }
 void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdbuf_handle,
                                                   rt_render_target_handle render_target) {
    GET_CMDBUF(cmdbuf, cmdbuf_handle)
    uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
    if (render_target.index == rtGetSwapchainRenderTarget().index) {
        image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
    }
    rt_render_target *rt = rtGetRenderTarget(render_target);
    if (!rt) {
        rtLog("vk", "Tried to flush invalid render target");
        return;
    }
    VkAccessFlags2 src_access;
    VkPipelineStageFlags2 src_stage;
    if (rt->states[image_index] == RT_RENDER_TARGET_STATE_ATTACHMENT) {
        src_access =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
                : VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
        src_stage =
            (rt->format == VK_FORMAT_D24_UNORM_S8_UINT || rt->format == VK_FORMAT_D32_SFLOAT)
                ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
                      VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR
                : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
    } else if (rt->states[image_index] == RT_RENDER_TARGET_STATE_STORAGE_IMAGE){ /* SAMPLED_IMAGE or STORAGE_IMAGE */
        src_access = VK_ACCESS_2_MEMORY_WRITE_BIT;
        src_stage  = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
    }else {
        return;
    }
    VkMemoryBarrier2 barrier = {.sType         = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
                                .srcAccessMask = src_access,
                                .srcStageMask  = src_stage,
                                .dstAccessMask = 0,
                                .dstStageMask  = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT};
    VkDependencyInfo dep     = {
            .sType              = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
            .memoryBarrierCount = 1,
            .pMemoryBarriers    = &barrier,
    };
    vkCmdPipelineBarrier2(cmdbuf, &dep);
 }
--- a/src/renderer/vk/frame.c
+++ b/src/renderer/vk/frame.c
@ -1,139 +0,0 @@
 #include "command_buffers.h"
 #include "gpu.h"
 #include "render_targets.h"
 #include "swapchain.h"
 #include "transfers.h"
 #include "gfx/renderer_api.h"
 #define ONE_SECOND_NS 1000000000u
 void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
    g_gpu.current_frame_id = frame_id;
    rt_frame_data *frame = rtGetFrameData(frame_id);
    /* Wait until the previous frame is done */
    VkFence fence = g_swapchain.image_fences[frame_id % g_swapchain.image_count];
    RT_VK_CHECK(vkWaitForFences(g_gpu.device, 1, &fence, VK_TRUE, ONE_SECOND_NS));
    RT_VK_CHECK(vkResetFences(g_gpu.device, 1, &fence));
    rtResetCommandPools(frame_id);
    VkResult acquire_res = vkAcquireNextImageKHR(g_gpu.device,
                                                 g_swapchain.swapchain,
                                                 ONE_SECOND_NS,
                                                 frame->image_available,
                                                 fence,
                                                 &frame->swapchain_image_index);
    if (acquire_res == VK_SUBOPTIMAL_KHR || acquire_res == VK_ERROR_OUT_OF_DATE_KHR) {
        /* We need to recreate the swapchain and try again */
        rtLog("vk", "Swapchain has become suboptimal and needs to be re-created.");
        vkDeviceWaitIdle(g_gpu.device);
        if (rtRecreateSwapchain() != RT_SUCCESS) {
            rtReportError("vk", "Failed to recreate the swapchain.");
            return;
        }
        rtUpdateSwapchainRenderTarget();
        rtUpdateRenderTargetsFromSwapchain(g_swapchain.image_count,
                                           g_swapchain.format,
                                           g_swapchain.extent);
        rtRenBeginFrame(frame_id);
    } else if (acquire_res != VK_SUCCESS) {
        rtReportError("vk", "vkAcquireNextImageKHR failed: %u", acquire_res);
    }
    /* Update the swapchain render target */
    rt_render_target_handle swap_rt_handle        = rtGetSwapchainRenderTarget();
    rt_render_target *swap_rt                     = rtGetRenderTarget(swap_rt_handle);
    swap_rt->states[frame->swapchain_image_index] = RT_RENDER_TARGET_STATE_INVALID;
 }
 void RT_RENDERER_API_FN(EndFrame)(unsigned int frame_id) {
    rt_frame_data *frame = rtGetFrameData(frame_id);
    uint32_t image_index = frame->swapchain_image_index;
    /* Transition the swap chain image to the correct layout */
    VkCommandBuffer cmd = rtAllocSingleCommandBuffer(RT_GRAPHICS_QUEUE);
    if (cmd == VK_NULL_HANDLE) {
        rtReportError("vk",
                      "Failed to allocate a command buffer for transitioning the swapchain image "
                      "to PRESENT_SRC layout.");
        return;
    }
    VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
                                           .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
    vkBeginCommandBuffer(cmd, &begin_info);
 #ifdef RT_DEBUG
    VkDebugUtilsLabelEXT debug_label = {
        .sType      = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
        .color      = {.13f, .54f, .13f, 1.f},
        .pLabelName = "Transition Swapchain"
    };
    vkCmdBeginDebugUtilsLabelEXT(cmd, &debug_label);
 #endif
    VkImageMemoryBarrier2 image_barrier = {
        .sType         = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
        .srcStageMask  = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
        .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT,
        .dstStageMask  = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
        .dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT | VK_ACCESS_2_MEMORY_READ_BIT,
        .oldLayout     = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
        .newLayout     = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
        .image         = g_swapchain.images[image_index],
 /* clang-format off */
        .subresourceRange = {
            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
            .baseArrayLayer = 0,
            .baseMipLevel = 0,
            .layerCount = 1,
            .levelCount = 1,
        },
  /* clang-format on */
    };
    VkDependencyInfo dep_info = {
        .sType                   = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
        .pImageMemoryBarriers    = &image_barrier,
        .imageMemoryBarrierCount = 1,
    };
    vkCmdPipelineBarrier2(cmd, &dep_info);
 #ifdef RT_DEBUG
    vkCmdEndDebugUtilsLabelEXT(cmd);
 #endif
    vkEndCommandBuffer(cmd);
    if (rtSubmitSingleCommandBuffer(cmd,
                                    &frame->render_finished,
                                    NULL,
                                    1,
                                    &frame->swapchain_transitioned,
                                    NULL,
                                    1,
                                    RT_GRAPHICS_QUEUE,
                                    VK_NULL_HANDLE) != RT_SUCCESS) {
        rtReportError("vk", "Failed to submit the layout transition for the swapchain image.");
        return;
    }
    VkPresentInfoKHR present_info = {
        .sType              = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
        .pImageIndices      = &image_index,
        .pSwapchains        = &g_swapchain.swapchain,
        .swapchainCount     = 1,
        .pWaitSemaphores    = &frame->swapchain_transitioned,
        .waitSemaphoreCount = 1,
    };
    VkResult res = vkQueuePresentKHR(g_gpu.present_queue, &present_info);
    if (res != VK_SUCCESS) {
        rtReportError("vk", "vkQueuePresentKHR failed: %u", res);
    }
    rtFlushGPUTransfers();
 }
--- a/src/renderer/vk/framebuffer.h
+++ b/src/renderer/vk/framebuffer.h
@ -1,22 +0,0 @@
 #ifndef RT_VK_FRAMEBUFFER_H
 #define RT_VK_FRAMEBUFFER_H
 #include <volk/volk.h>
 typedef struct {
    VkFramebuffer framebuffer;
    uint32_t pass_idx;
 } rt_framebuffer;
 typedef struct {
    uint32_t index;
 } rt_framebuffer_handle;
 /* Reserve a slot, but don't actually create the framebuffer yet.
 * We can use this if we are unsure if the framebuffer will really be needed.
 */
 rt_framebuffer_handle rt_reserve_framebuffer(void);
 rt_framebuffer *rt_get_framebuffer(rt_framebuffer_handle handle);
 #endif
--- a/src/renderer/vk/gpu.h
+++ b/src/renderer/vk/gpu.h
@ -1,107 +0,0 @@
 #ifndef RT_VK_GPU_H
 #define RT_VK_GPU_H
 #include <volk/volk.h>
 #define VMA_STATIC_VULKAN_FUNCTIONS 0
 #define VMA_DYNAMI_VULKAN_FUNCTIONS 0
 #include <vma/vk_mem_alloc.h>
 #include "gfx/renderer_api.h"
 /* Used to mark a resource as not owned by a particular queue */
 #define RT_VK_UNOWNED 255
 /* Minimum supported value of g_gpu.max_frames_in_flight */
 #define RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT 2
 /* Maximum supported number of frames in flight.
 * The actually configured value is contained in g_gpu. */
 #define RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT 3
 #ifdef _WIN32
 struct HINSTANCE__;
 struct HWND__;
 #elif defined(RT_USE_XLIB)
 struct _XDisplay;
 #endif
 typedef struct {
 #ifdef _WIN32
    struct HINSTANCE__ *hInstance;
    struct HWND__ *hWnd;
 #elif defined(RT_USE_XLIB)
    struct _XDisplay *display;
    unsigned long window;
 #endif
 } rt_native_window;
 typedef struct {
    uint32_t swapchain_image_index;
    VkSemaphore image_available;
    VkSemaphore render_finished;
    VkSemaphore swapchain_transitioned;
 } rt_frame_data;
 typedef struct {
    VkInstance instance;
    VkDebugUtilsMessengerEXT messenger;
    VkAllocationCallbacks *alloc_cb;
    VkPhysicalDevice phys_device;
    VkDevice device;
    VkSurfaceKHR surface;
    VkQueue graphics_queue;
    VkQueue compute_queue;
    VkQueue present_queue;
    VkQueue transfer_queue;
    uint32_t graphics_family;
    uint32_t compute_family;
    uint32_t present_family;
    uint32_t transfer_family;
    rt_native_window native_window;
    VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props;
    VkPhysicalDeviceProperties phys_device_props;
    VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features;
    VkPhysicalDeviceFeatures phys_device_features;
    VmaAllocator allocator;
    unsigned int max_frames_in_flight;
    unsigned int current_frame_id;
    rt_frame_data frames[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
 } rt_vk_gpu;
 #ifndef RT_VK_DONT_DEFINE_GPU_GLOBAL
 extern rt_vk_gpu g_gpu;
 RT_INLINE rt_frame_data *rtGetFrameData(unsigned int frame_id) {
    return &g_gpu.frames[frame_id % g_gpu.max_frames_in_flight];
 }
 #endif
 /* Helper functions */
 #define RT_VK_CHECK(expr)                                                                          \
    do {                                                                                           \
        VkResult res = expr;                                                                       \
        if (res != VK_SUCCESS) {                                                                   \
            rtReportError("vk", "Vulkan command failed with error %u.\nCommand: %s", res, #expr);  \
        }                                                                                          \
    } while (0)
 VkFormat rtPixelFormatToVkFormat(rt_pixel_format format);
 VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count);
 VkQueue rtGetQueue(rt_gpu_queue queue);
 uint32_t rtGetQueueFamily(rt_gpu_queue queue);
 const char *rtVkFormatToString(VkFormat format);
 #endif
--- a/src/renderer/vk/gpu_sync.c
+++ b/src/renderer/vk/gpu_sync.c
@ -1,192 +0,0 @@
 #include "gpu.h"
 #include "runtime/config.h"
 #include "runtime/handles.h"
 #include "runtime/threading.h"
 #include "gfx/renderer_api.h"
 #include <stdlib.h>
 RT_CVAR_I(rt_VkMaxSemaphores, "Maximum number of semaphores. Default: 1024", 1024);
 #define SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX 0xffffff
 #define RENDER_FINISHED_SEMAPHORE_INDEX     0xfffffe
 typedef struct rt_gpu_semaphore_s {
    uint32_t version;
    VkSemaphore semaphore[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
    uint64_t current_value[RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT];
    /* TODO: State tracking. We want to ensure that we don't introduce gpu hangs by waiting on a
     * not-signaled semaphore. */
    struct rt_gpu_semaphore_s *next_free;
 } rt_gpu_semaphore;
 static rt_gpu_semaphore *_semaphores;
 static rt_gpu_semaphore *_first_free;
 static rt_mutex *_lock;
 static void DestroySemaphore(rt_gpu_semaphore *s) {
    for (uint32_t i = 0; i < g_gpu.max_frames_in_flight; ++i) {
        vkDestroySemaphore(g_gpu.device, s->semaphore[i], g_gpu.alloc_cb);
        s->semaphore[i] = VK_NULL_HANDLE;
    }
    rtLockMutex(_lock);
    s->next_free = _first_free;
    _first_free  = s;
    rtUnlockMutex(_lock);
 }
 rt_result InitializeSempahoreManagement(void) {
    _semaphores = calloc(rt_VkMaxSemaphores.i, sizeof(rt_gpu_semaphore));
    if (!_semaphores)
        return RT_OUT_OF_MEMORY;
    _lock = rtCreateMutex();
    if (!_lock) {
        free(_semaphores);
        return RT_UNKNOWN_ERROR;
    }
    /* Keep 0 unused for the invalid handle */
    _first_free = &_semaphores[1];
    for (int i = 1; i < rt_VkMaxSemaphores.i - 1; ++i)
        _semaphores[i].next_free = &_semaphores[i + 1];
    _semaphores[rt_VkMaxSemaphores.i - 1].next_free = NULL;
    return RT_SUCCESS;
 }
 void ShutdownSemaphoreManagement(void) {
    for (int i = 1; i < rt_VkMaxSemaphores.i; ++i) {
        for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j)
            vkDestroySemaphore(g_gpu.device, _semaphores[i].semaphore[j], g_gpu.alloc_cb);
    }
 }
 rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count,
                                               const rt_gpu_semaphore_info *info,
                                               rt_gpu_semaphore_handle *p_semaphores) {
    for (uint32_t i = 0; i < count; ++i) {
        rtLockMutex(_lock);
        rt_gpu_semaphore *sem = _first_free;
        if (sem)
            _first_free = sem->next_free;
        rtUnlockMutex(_lock);
        if (!sem) {
            for (uint32_t j = 0; j < i; ++j) {
                uint32_t index = p_semaphores[j].index;
                DestroySemaphore(&_semaphores[index]);
            }
            return RT_OUT_OF_MEMORY;
        }
        sem->version = (sem->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
        for (uint32_t j = 0; j < g_gpu.max_frames_in_flight; ++j) {
            VkSemaphoreTypeCreateInfo type_info = {
                .sType         = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
                .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
                .initialValue  = info[i].initial_value,
            };
            VkSemaphoreCreateInfo semaphore_info = {
                .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
                .pNext = &type_info,
            };
            if (vkCreateSemaphore(g_gpu.device,
                                  &semaphore_info,
                                  g_gpu.alloc_cb,
                                  &sem->semaphore[j]) != VK_SUCCESS) {
                for (uint32_t k = 0; k < i; ++k) {
                    uint32_t index = p_semaphores[k].index;
                    DestroySemaphore(&_semaphores[index]);
                }
                return RT_UNKNOWN_ERROR;
            }
 #ifdef RT_DEBUG
            char name[128];
            rtSPrint(name, 128, "%s (%u)", (info->name) ? info->name : "Unnamed Semaphore", j);
            VkDebugUtilsObjectNameInfoEXT name_info = {
                .sType        = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
                .objectHandle = (uint64_t)sem->semaphore[j],
                .objectType   = VK_OBJECT_TYPE_SEMAPHORE,
                .pObjectName  = name,
            };
            vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
 #endif
            sem->current_value[j] = 0;
        }
        p_semaphores[i].version = (unsigned char)sem->version;
        p_semaphores[i].index   = (uint32_t)(sem - _semaphores);
    }
    return RT_SUCCESS;
 }
 void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) {
    for (uint32_t i = 0; i < count; ++i) {
        uint32_t index = semaphores[i].index;
        if (index >= (uint32_t)rt_VkMaxSemaphores.i)
            continue;
        if (semaphores[i].version != _semaphores[index].version) {
            rtLog("vk",
                  "Tried to destroy semaphore %u with version %u, but the semaphore has version %u",
                  index,
                  semaphores[i].version,
                  _semaphores[index].version);
            continue;
        }
        DestroySemaphore(&_semaphores[index]);
    }
 }
 VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle) {
    uint32_t index = handle.index;
    if (index == SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX) {
        rt_frame_data *fd = rtGetFrameData(g_gpu.current_frame_id);
        return fd->image_available;
    } else if (index == RENDER_FINISHED_SEMAPHORE_INDEX) {
        rt_frame_data *fd = rtGetFrameData(g_gpu.current_frame_id);
        return fd->render_finished;
    }
    if (!RT_IS_HANDLE_VALID(handle) || index >= (uint32_t)rt_VkMaxSemaphores.i)
        return VK_NULL_HANDLE;
    if (_semaphores[index].version != handle.version)
        return VK_NULL_HANDLE;
    uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
    return _semaphores[index].semaphore[frame];
 }
 uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle semaphore) {
    uint32_t index = semaphore.index;
    if (!RT_IS_HANDLE_VALID(semaphore) || index >= (uint32_t)rt_VkMaxSemaphores.i)
        return 0;
    if (_semaphores[index].version != semaphore.version)
        return 0;
    uint32_t frame = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
    vkGetSemaphoreCounterValue(g_gpu.device,
                               _semaphores[index].semaphore[frame],
                               &_semaphores[index].current_value[frame]);
    return _semaphores[index].current_value[frame];
 }
 rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) {
    return (rt_gpu_semaphore_handle){
        .version = 1,
        .index   = SWAPCHAIN_AVAILABLE_SEMAPHORE_INDEX,
    };
 }
 rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) {
    return (rt_gpu_semaphore_handle){
        .version = 1,
        .index   = RENDER_FINISHED_SEMAPHORE_INDEX,
    };
 }
--- a/src/renderer/vk/gpu_sync.h
+++ b/src/renderer/vk/gpu_sync.h
@ -1,10 +0,0 @@
 #ifndef RT_VK_GPU_SYNC_H
 #define RT_VK_GPU_SYNC_H
 #include <volk/volk.h>
 #include "gfx/renderer_api.h"
 VkSemaphore rtGetSemaphore(rt_gpu_semaphore_handle handle);
 #endif
--- a/src/renderer/vk/helper.c
+++ b/src/renderer/vk/helper.c
@ -1,97 +0,0 @@
 #include "gpu.h"
 VkFormat rtPixelFormatToVkFormat(rt_pixel_format format) {
    switch (format) {
    case RT_PIXEL_FORMAT_R8G8B8A8_UNORM:
        return VK_FORMAT_R8G8B8A8_UNORM;
    case RT_PIXEL_FORMAT_B8G8R8A8_UNORM:
        return VK_FORMAT_B8G8R8A8_UNORM;
    case RT_PIXEL_FORMAT_R8G8B8A8_SRGB:
        return VK_FORMAT_R8G8B8A8_SRGB;
    case RT_PIXEL_FORMAT_B8G8R8A8_SRGB:
        return VK_FORMAT_B8G8R8A8_SRGB;
    case RT_PIXEL_FORMAT_R8G8B8_UNORM:
        return VK_FORMAT_R8G8B8_UNORM;
    case RT_PIXEL_FORMAT_B8G8R8_UNORM:
        return VK_FORMAT_B8G8R8_UNORM;
    case RT_PIXEL_FORMAT_R8G8B8_SRGB:
        return VK_FORMAT_R8G8B8_SRGB;
    case RT_PIXEL_FORMAT_B8G8R8_SRGB:
        return VK_FORMAT_B8G8R8_SRGB;
    case RT_PIXEL_FORMAT_DEPTH24_STENCIL8:
        return VK_FORMAT_D24_UNORM_S8_UINT;
    case RT_PIXEL_FORMAT_DEPTH32:
        return VK_FORMAT_D32_SFLOAT;
    default:
        return VK_FORMAT_UNDEFINED;
    }
 }
 VkSampleCountFlagBits rtSampleCountToFlags(unsigned int count) {
    /* Limit to what the gpu supports */
    VkSampleCountFlags counts = g_gpu.phys_device_props.limits.framebufferColorSampleCounts &
                                g_gpu.phys_device_props.limits.framebufferDepthSampleCounts &
                                g_gpu.phys_device_props.limits.sampledImageColorSampleCounts &
                                g_gpu.phys_device_props.limits.sampledImageDepthSampleCounts;
    while (count > 1) {
        if ((counts & count) == 0)
            count >>= 1;
        else
            break;
    }
    return (VkSampleCountFlagBits)count;
 }
 VkQueue rtGetQueue(rt_gpu_queue queue) {
    switch (queue) {
    case RT_GRAPHICS_QUEUE:
        return g_gpu.graphics_queue;
    case RT_COMPUTE_QUEUE:
        return g_gpu.compute_queue;
    case RT_TRANSFER_QUEUE:
        return g_gpu.transfer_queue;
    default:
        return VK_NULL_HANDLE;
    }
 }
 uint32_t rtGetQueueFamily(rt_gpu_queue queue) {
    switch (queue) {
    case RT_GRAPHICS_QUEUE:
        return g_gpu.graphics_family;
    case RT_COMPUTE_QUEUE:
        return g_gpu.compute_family;
    case RT_TRANSFER_QUEUE:
        return g_gpu.transfer_family;
    default:
        return UINT32_MAX;
    }
 }
 const char *rtVkFormatToString(VkFormat format) {
    switch (format) {
    case VK_FORMAT_R8G8B8A8_UNORM:
        return "R8G8B8A8_UNORM";
    case VK_FORMAT_B8G8R8A8_UNORM:
        return "B8G8R8A8_UNORM";
    case VK_FORMAT_R8G8B8A8_SRGB:
        return "R8G8B8A8_SRGB";
    case VK_FORMAT_B8G8R8A8_SRGB:
        return "B8G8R8A8_SRGB";
    case VK_FORMAT_R8G8B8_UNORM:
        return "R8G8B8_UNORM";
    case VK_FORMAT_B8G8R8_UNORM:
        return "B8G8R8_UNORM";
    case VK_FORMAT_R8G8B8_SRGB:
        return "R8G8B8_SRGB";
    case VK_FORMAT_B8G8R8_SRGB:
        return "B8G8R8_SRGB";
    case VK_FORMAT_D24_UNORM_S8_UINT:
        return "D24_UNORM_S8_UINT";
    case VK_FORMAT_D32_SFLOAT:
        return "D32_SFLOAT";
    default:
        return "UNDEFINED";
    }
 }
--- a/src/renderer/vk/init.c
+++ b/src/renderer/vk/init.c
@ -1,737 +0,0 @@
 #include <malloc.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 #define RT_VK_DONT_DEFINE_GPU_GLOBAL
 #include "gpu.h"
 #include "render_targets.h"
 #include "swapchain.h"
 #include "runtime/config.h"
 #include "runtime/runtime.h"
 #include "gfx/renderer_api.h"
 #define TARGET_API_VERSION VK_API_VERSION_1_3
 RT_CVAR_I(r_VkEnableAPIAllocTracking,
          "Enable tracking of allocations done by the vulkan api. [0/1] Default: 0",
          0);
 RT_CVAR_S(r_VkPhysDeviceName, "Name of the selected physical device. Default: \"\"", "");
 RT_CVAR_I(r_VkMaxFramesInFlight, "Maximum number of frames in flight. [2/3] Default: 2", 2);
 rt_vk_gpu g_gpu;
 static VkAllocationCallbacks _tracking_alloc_cbs;
 static const char *AllocationScopeToString(VkSystemAllocationScope scope) {
    switch (scope) {
    case VK_SYSTEM_ALLOCATION_SCOPE_COMMAND:
        return "COMMAND";
    case VK_SYSTEM_ALLOCATION_SCOPE_OBJECT:
        return "OBJECT";
    case VK_SYSTEM_ALLOCATION_SCOPE_CACHE:
        return "CACHE";
    case VK_SYSTEM_ALLOCATION_SCOPE_DEVICE:
        return "DEVICE";
    case VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE:
        return "INSTANCE";
    default:
        return "UNKNOWN";
    }
 }
 static void *
 TrackAllocation(void *userData, size_t size, size_t alignment, VkSystemAllocationScope scope) {
    rtLog("vk",
          "Allocation. Size: %zu, Alignment: %zu, Scope: %s",
          size,
          alignment,
          AllocationScopeToString(scope));
 #ifdef _WIN32
    return _aligned_malloc(size, alignment);
 #else
    return aligned_alloc(alignment, size);
 #endif
 }
 static void *TrackReallocation(void *userData,
                               void *original,
                               size_t size,
                               size_t alignment,
                               VkSystemAllocationScope scope) {
    rtLog("vk",
          "Reallocation. Size: %zu, Alignment: %zu, Scope: %s",
          size,
          alignment,
          AllocationScopeToString(scope));
    return realloc(original, size);
 }
 static void TrackFree(void *userData, void *memory) {
    free(memory);
 }
 static VkBool32 VKAPI_PTR
 DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
                      VkDebugUtilsMessageTypeFlagsEXT types,
                      const VkDebugUtilsMessengerCallbackDataEXT *callbackData,
                      void *userData) {
    if (severity < VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)
        return VK_FALSE;
    const char *severity_str = "<UNKNOWN>";
    if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)
        severity_str = "WARNING";
    else if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
        severity_str = "ERROR";
    rtLog("vk", "[%s] %s", severity_str, callbackData->pMessage);
    if (severity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
        RT_DEBUGBREAK;
    return VK_FALSE;
 }
 extern rt_cvar r_VkPreferredSwapchainImages;
 extern rt_cvar r_VkPreferMailboxMode;
 extern rt_cvar r_VkMaxPipelineCount;
 void RT_RENDERER_API_FN(RegisterCVars)(void) {
    rtRegisterCVAR(&r_VkEnableAPIAllocTracking);
    rtRegisterCVAR(&r_VkPhysDeviceName);
    rtRegisterCVAR(&r_VkPreferredSwapchainImages);
    rtRegisterCVAR(&r_VkPreferMailboxMode);
    rtRegisterCVAR(&r_VkMaxFramesInFlight);
    rtRegisterCVAR(&r_VkMaxPipelineCount);
 }
 static rt_result CreateInstance(void) {
    VkResult result = volkInitialize();
    if (result != VK_SUCCESS) {
        rtReportError("vk", "Initialization failed: volkInitialize()");
        return 1;
    }
    VkApplicationInfo app_info = {
        .apiVersion         = TARGET_API_VERSION,
        .applicationVersion = 0x00001000,
        .engineVersion      = 0x00001000,
        .pEngineName        = "voyageEngine",
        .pApplicationName   = "Voyage",
    };
    const char *extensions[] = {
        VK_KHR_SURFACE_EXTENSION_NAME,
 #ifdef _WIN32
        "VK_KHR_win32_surface",
 #elif defined(RT_USE_XLIB)
        "VK_KHR_xlib_surface",
 #endif
 #ifdef RT_DEBUG
        VK_EXT_DEBUG_UTILS_EXTENSION_NAME,
 #endif
    };
    const char *layers[1];
    unsigned int layer_count = 0;
 #ifdef RT_DEBUG
    /* Search for layers we want to enable */
    uint32_t available_layer_count = 0;
    result = vkEnumerateInstanceLayerProperties(&available_layer_count, NULL);
    if (result == VK_SUCCESS) {
        VkLayerProperties *props = calloc(available_layer_count, sizeof(VkLayerProperties));
        if (props) {
            vkEnumerateInstanceLayerProperties(&available_layer_count, props);
            for (uint32_t i = 0; i < available_layer_count; ++i) {
                if (strcmp(props[i].layerName, "VK_LAYER_KHRONOS_validation") == 0) {
                    layers[0]   = "VK_LAYER_KHRONOS_validation";
                    layer_count = 1;
                    break;
                }
            }
            free(props);
        } else {
            rtLog("vk", "Failed to allocate storage for instance layer properties.");
        }
    } else {
        rtLog("vk", "vkEnumerateInstanceLayerProperties failed.");
    }
 #endif
    VkInstanceCreateInfo instance_info = {
        .sType                   = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
        .pApplicationInfo        = &app_info,
        .ppEnabledExtensionNames = extensions,
        .enabledExtensionCount   = RT_ARRAY_COUNT(extensions),
        .ppEnabledLayerNames     = layers,
        .enabledLayerCount       = layer_count,
    };
    result = vkCreateInstance(&instance_info, g_gpu.alloc_cb, &g_gpu.instance);
    if (result != VK_SUCCESS) {
        rtReportError("vk", "Failed to create the vulkan instance.");
        return 1;
    }
    volkLoadInstance(g_gpu.instance);
 #ifdef RT_DEBUG
    /* Create the debug utils messenger */
    VkDebugUtilsMessengerCreateInfoEXT messenger_info = {
        .sType           = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
        .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
                           VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
        .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
                       VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
        .pfnUserCallback = DebugUtilsMessengerCb,
    };
    vkCreateDebugUtilsMessengerEXT(g_gpu.instance,
                                   &messenger_info,
                                   g_gpu.alloc_cb,
                                   &g_gpu.messenger);
 #endif
    return RT_SUCCESS;
 }
 static rt_result CreateSurface(const rt_renderer_init_info *info) {
 #ifdef _WIN32
    g_gpu.native_window.hInstance            = info->hInstance;
    g_gpu.native_window.hWnd                 = info->hWnd;
    VkWin32SurfaceCreateInfoKHR surface_info = {
        .sType     = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
        .hinstance = info->hInstance,
        .hwnd      = info->hWnd,
    };
    if (vkCreateWin32SurfaceKHR(g_gpu.instance, &surface_info, g_gpu.alloc_cb, &g_gpu.surface) ==
        VK_SUCCESS)
        return RT_SUCCESS;
    else
        return 100;
 #elif defined(RT_USE_XLIB)
    g_gpu.native_window.display             = info->display;
    g_gpu.native_window.window              = info->window;
    VkXlibSurfaceCreateInfoKHR surface_info = {
        .sType  = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
        .dpy    = info->display,
        .window = info->window,
    };
    if (vkCreateXlibSurfaceKHR(g_gpu.instance, &surface_info, g_gpu.alloc_cb, &g_gpu.surface) ==
        VK_SUCCESS)
        return RT_SUCCESS;
    else
        return 100;
 #endif
 }
 typedef struct {
    uint32_t graphics;
    uint32_t compute;
    uint32_t present;
    uint32_t transfer;
 } rt_queue_indices;
 static rt_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfaceKHR surface) {
    rt_queue_indices indices = {.graphics = UINT32_MAX,
                                .compute  = UINT32_MAX,
                                .present  = UINT32_MAX,
                                .transfer = UINT32_MAX};
    uint32_t count = 0;
    vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, NULL);
    VkQueueFamilyProperties *props = calloc(count, sizeof(VkQueueFamilyProperties));
    if (!props) {
        return indices;
    }
    vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, props);
    for (uint32_t i = 0; i < count; ++i) {
        if (props[i].queueCount == 0)
            continue;
        if ((props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0)
            indices.graphics = i;
        if ((props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
            indices.compute = i;
        if ((props[i].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
            indices.transfer = i;
        VkBool32 present_supported = VK_FALSE;
        vkGetPhysicalDeviceSurfaceSupportKHR(phys_dev, i, surface, &present_supported);
        if (present_supported)
            indices.present = i;
    }
    if (indices.transfer == UINT32_MAX && indices.graphics != UINT32_MAX)
        indices.transfer = indices.graphics;
    else if (indices.transfer == UINT32_MAX && indices.compute != UINT32_MAX)
        indices.transfer = indices.compute;
    free(props);
    return indices;
 }
 static bool CheckDeviceExtensionSupported(VkPhysicalDevice phys_dev) {
    const char *required_extensions[] = {
        VK_KHR_SWAPCHAIN_EXTENSION_NAME,
    };
    uint32_t extension_count;
    vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, NULL);
    VkExtensionProperties *supported_extensions =
        calloc(extension_count, sizeof(VkExtensionProperties));
    if (!supported_extensions)
        return false;
    vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, supported_extensions);
    bool supported = true;
    for (uint32_t i = 0; i < RT_ARRAY_COUNT(required_extensions); ++i) {
        bool found = false;
        for (uint32_t j = 0; j < extension_count; ++j) {
            if (strncmp(supported_extensions[j].extensionName,
                        required_extensions[i],
                        VK_MAX_EXTENSION_NAME_SIZE) == 0) {
                found = true;
                break;
            }
        }
        if (!found) {
            supported = false;
            VkPhysicalDeviceProperties props;
            vkGetPhysicalDeviceProperties(phys_dev, &props);
            rtLog("Device %s does not support the required extension %s",
                  props.deviceName,
                  required_extensions[i]);
            goto out;
        }
    }
 out:
    free(supported_extensions);
    return supported;
 }
 static rt_result ChoosePhysicalDevice(void) {
    g_gpu.phys_device          = VK_NULL_HANDLE;
    uint32_t phys_device_count = 0;
    VkResult result = vkEnumeratePhysicalDevices(g_gpu.instance, &phys_device_count, NULL);
    if (result != VK_SUCCESS) {
        rtReportError("vk", "Failed to enumerate the physical devices.");
        return 2;
    }
    VkPhysicalDevice *phys_devices = calloc(phys_device_count, sizeof(VkPhysicalDevice));
    if (!phys_devices) {
        rtReportError("vk", "Failed to enumerate the physical devices: Out of memory.");
        return 2;
    }
    vkEnumeratePhysicalDevices(g_gpu.instance, &phys_device_count, phys_devices);
    uint32_t highscore  = 0;
    uint32_t best_index = phys_device_count;
    for (uint32_t i = 0; i < phys_device_count; ++i) {
        VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
        };
        VkPhysicalDeviceSynchronization2Features synchronization2_features = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
            .pNext = &timeline_semaphore_features,
        };
        VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
            .pNext = &synchronization2_features,
        };
        VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
            .pNext = &dynamic_rendering_features,
        };
        VkPhysicalDeviceFeatures2 features = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
            .pNext = &descriptor_indexing_features,
        };
        vkGetPhysicalDeviceFeatures2(phys_devices[i], &features);
        VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES,
            .pNext = NULL,
        };
        VkPhysicalDeviceProperties2 props = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
            .pNext = &descriptor_indexing_props,
        };
        vkGetPhysicalDeviceProperties2(phys_devices[i], &props);
        if (!CheckDeviceExtensionSupported(phys_devices[i]))
            continue;
        rt_queue_indices indices = RetrieveQueueIndices(phys_devices[i], g_gpu.surface);
        if (indices.compute == UINT32_MAX || indices.present == UINT32_MAX ||
            indices.graphics == UINT32_MAX)
            continue;
        if (!synchronization2_features.synchronization2 ||
            !dynamic_rendering_features.dynamicRendering ||
            !timeline_semaphore_features.timelineSemaphore)
            continue;
        /* Check for bindless support */
        if (!descriptor_indexing_features.runtimeDescriptorArray ||
            !descriptor_indexing_features.descriptorBindingPartiallyBound)
            continue;
        uint32_t score = 0;
        if (props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)
            score += 100;
        score += (props.properties.limits.maxFramebufferWidth / 100) *
                 (props.properties.limits.maxFramebufferHeight / 100);
        score +=
            (descriptor_indexing_props.shaderStorageBufferArrayNonUniformIndexingNative) ? 100 : 0;
        score +=
            (descriptor_indexing_props.shaderSampledImageArrayNonUniformIndexingNative) ? 100 : 0;
        if (score > highscore) {
            highscore  = score;
            best_index = i;
        }
        if (strncmp(props.properties.deviceName,
                    r_VkPhysDeviceName.s,
                    VK_MAX_PHYSICAL_DEVICE_NAME_SIZE) == 0) {
            best_index = i;
            break;
        }
    }
    if (best_index < phys_device_count) {
        g_gpu.phys_device = phys_devices[best_index];
        VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES,
            .pNext = NULL,
        };
        VkPhysicalDeviceProperties2 props = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
            .pNext = &descriptor_indexing_props,
        };
        VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
        };
        VkPhysicalDeviceFeatures2 features = {
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
            .pNext = &descriptor_indexing_features,
        };
        vkGetPhysicalDeviceFeatures2(phys_devices[best_index], &features);
        vkGetPhysicalDeviceProperties2(phys_devices[best_index], &props);
        g_gpu.phys_device_props            = props.properties;
        g_gpu.descriptor_indexing_props    = descriptor_indexing_props;
        g_gpu.phys_device_features         = features.features;
        g_gpu.descriptor_indexing_features = descriptor_indexing_features;
    }
    free(phys_devices);
    if (g_gpu.phys_device == VK_NULL_HANDLE) {
        rtReportError("vk", "Failed to find a suitable physical device.");
        return 3;
    }
    return RT_SUCCESS;
 }
 static rt_result CreateDevice(void) {
    const char *extensions[] = {
        VK_KHR_SWAPCHAIN_EXTENSION_NAME,
    };
    rt_queue_indices queue_indices = RetrieveQueueIndices(g_gpu.phys_device, g_gpu.surface);
    g_gpu.compute_family  = queue_indices.compute;
    g_gpu.graphics_family = queue_indices.graphics;
    g_gpu.present_family  = queue_indices.present;
    g_gpu.transfer_family = queue_indices.transfer;
    float priority = 1.f;
    uint32_t distinct_queue_count = 1;
    VkDeviceQueueCreateInfo queue_info[4];
    queue_info[0].sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
    queue_info[0].pNext            = NULL;
    queue_info[0].flags            = 0;
    queue_info[0].queueCount       = 1;
    queue_info[0].queueFamilyIndex = queue_indices.graphics;
    queue_info[0].pQueuePriorities = &priority;
    if (queue_indices.compute != queue_indices.graphics) {
        queue_info[1].sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
        queue_info[1].pNext            = NULL;
        queue_info[1].flags            = 0;
        queue_info[1].queueCount       = 1;
        queue_info[1].queueFamilyIndex = queue_indices.compute;
        queue_info[1].pQueuePriorities = &priority;
        ++distinct_queue_count;
    }
    if (queue_indices.present != queue_indices.graphics &&
        queue_indices.present != queue_indices.compute) {
        queue_info[distinct_queue_count].sType      = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
        queue_info[distinct_queue_count].pNext      = NULL;
        queue_info[distinct_queue_count].flags      = 0;
        queue_info[distinct_queue_count].queueCount = 1;
        queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.present;
        queue_info[distinct_queue_count].pQueuePriorities = &priority;
        ++distinct_queue_count;
    }
    if (queue_indices.transfer != queue_indices.graphics &&
        queue_indices.transfer != queue_indices.compute) {
        queue_info[distinct_queue_count].sType      = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
        queue_info[distinct_queue_count].pNext      = NULL;
        queue_info[distinct_queue_count].flags      = 0;
        queue_info[distinct_queue_count].queueCount = 1;
        queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.transfer;
        queue_info[distinct_queue_count].pQueuePriorities = &priority;
        ++distinct_queue_count;
    }
    VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features = {
        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
    };
    VkPhysicalDeviceSynchronization2Features synchronization2_features = {
        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES,
        .pNext = &timeline_semaphore_features,
    };
    VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
        .pNext = &synchronization2_features,
    };
    VkPhysicalDeviceDescriptorIndexingFeatures indexing_features = {
        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES,
        .pNext = &dynamic_rendering_features,
    };
    VkPhysicalDeviceFeatures2 features = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
                                          .pNext = &indexing_features};
    vkGetPhysicalDeviceFeatures2(g_gpu.phys_device, &features);
    RT_ASSERT(indexing_features.runtimeDescriptorArray &&
                  indexing_features.descriptorBindingPartiallyBound,
              "We require a device that supports bindless vulkan.");
    VkDeviceCreateInfo device_info = {
        .sType                   = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
        .pNext                   = &features,
        .enabledExtensionCount   = RT_ARRAY_COUNT(extensions),
        .ppEnabledExtensionNames = extensions,
        .pQueueCreateInfos       = queue_info,
        .queueCreateInfoCount    = distinct_queue_count,
    };
    if (vkCreateDevice(g_gpu.phys_device, &device_info, g_gpu.alloc_cb, &g_gpu.device) !=
        VK_SUCCESS) {
        rtReportError("vk", "Device creation failed.");
        return 10;
    }
    vkGetDeviceQueue(g_gpu.device, queue_indices.graphics, 0, &g_gpu.graphics_queue);
    vkGetDeviceQueue(g_gpu.device, queue_indices.compute, 0, &g_gpu.compute_queue);
    vkGetDeviceQueue(g_gpu.device, queue_indices.present, 0, &g_gpu.present_queue);
    vkGetDeviceQueue(g_gpu.device, queue_indices.transfer, 0, &g_gpu.transfer_queue);
    return RT_SUCCESS;
 }
 static rt_result CreateAllocator(void) {
 #define SET_FNC(name)     fncs.name = name
 #define SET_KHR_FNC(name) (fncs).name##KHR = name
    VmaVulkanFunctions fncs                = {NULL};
    SET_FNC(vkGetInstanceProcAddr);
    SET_FNC(vkGetDeviceProcAddr);
    SET_FNC(vkGetPhysicalDeviceProperties);
    SET_FNC(vkGetPhysicalDeviceMemoryProperties);
    SET_FNC(vkAllocateMemory);
    SET_FNC(vkFreeMemory);
    SET_FNC(vkMapMemory);
    SET_FNC(vkUnmapMemory);
    SET_FNC(vkFlushMappedMemoryRanges);
    SET_FNC(vkInvalidateMappedMemoryRanges);
    SET_FNC(vkBindBufferMemory);
    SET_FNC(vkBindImageMemory);
    SET_FNC(vkGetBufferMemoryRequirements);
    SET_FNC(vkGetImageMemoryRequirements);
    SET_FNC(vkCreateBuffer);
    SET_FNC(vkDestroyBuffer);
    SET_FNC(vkCreateImage);
    SET_FNC(vkDestroyImage);
    SET_FNC(vkCmdCopyBuffer);
    SET_KHR_FNC(vkGetBufferMemoryRequirements2);
    SET_KHR_FNC(vkGetImageMemoryRequirements2);
    SET_KHR_FNC(vkBindBufferMemory2);
    SET_KHR_FNC(vkBindImageMemory2);
    SET_KHR_FNC(vkGetPhysicalDeviceMemoryProperties2);
    SET_FNC(vkGetDeviceBufferMemoryRequirements);
    SET_FNC(vkGetDeviceImageMemoryRequirements);
 #undef SET_FNC
 #undef SET_KHR_FNC
    VmaAllocatorCreateInfo allocator_info = {
        .instance             = g_gpu.instance,
        .physicalDevice       = g_gpu.phys_device,
        .device               = g_gpu.device,
        .pAllocationCallbacks = g_gpu.alloc_cb,
        .vulkanApiVersion     = TARGET_API_VERSION,
        .pVulkanFunctions     = &fncs,
    };
    return vmaCreateAllocator(&allocator_info, &g_gpu.allocator) == VK_SUCCESS ? RT_SUCCESS
                                                                               : RT_UNKNOWN_ERROR;
 }
 static void DestroyAllocator(void) {
    vmaDestroyAllocator(g_gpu.allocator);
 }
 static rt_result CreatePerFrameObjects(void) {
    for (unsigned int i = 0; i < g_gpu.max_frames_in_flight; ++i) {
        VkSemaphoreCreateInfo semaphore_info = {
            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
        };
        if (vkCreateSemaphore(g_gpu.device,
                              &semaphore_info,
                              g_gpu.alloc_cb,
                              &g_gpu.frames[i].render_finished) != VK_SUCCESS) {
            return RT_UNKNOWN_ERROR;
        }
        if (vkCreateSemaphore(g_gpu.device,
                              &semaphore_info,
                              g_gpu.alloc_cb,
                              &g_gpu.frames[i].image_available) != VK_SUCCESS) {
            return RT_UNKNOWN_ERROR;
        }
        if (vkCreateSemaphore(g_gpu.device,
                              &semaphore_info,
                              g_gpu.alloc_cb,
                              &g_gpu.frames[i].swapchain_transitioned) != VK_SUCCESS) {
            return RT_UNKNOWN_ERROR;
        }
 #ifdef RT_DEBUG
        char name[128];
        rtSPrint(name, 128, "Render Finished Semaphore (%u)", i);
        VkDebugUtilsObjectNameInfoEXT name_info = {
            .sType        = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
            .objectHandle = (uint64_t)g_gpu.frames[i].render_finished,
            .objectType   = VK_OBJECT_TYPE_SEMAPHORE,
            .pObjectName  = name,
        };
        vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
        rtSPrint(name, 128, "Image Available Semaphore (%u)", i);
        name_info.objectHandle = (uint64_t)g_gpu.frames[i].image_available;
        vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
        rtSPrint(name, 128, "Swapchain Transitioned Semaphore (%u)", i);
        name_info.objectHandle = (uint64_t)g_gpu.frames[i].swapchain_transitioned;
        vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
 #endif
    }
    return RT_SUCCESS;
 }
 void DestroyPerFrameObjects(void) {
    for (unsigned int i = 0; i < g_gpu.max_frames_in_flight; ++i) {
        vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].image_available, g_gpu.alloc_cb);
        vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].render_finished, g_gpu.alloc_cb);
        vkDestroySemaphore(g_gpu.device, g_gpu.frames[i].swapchain_transitioned, g_gpu.alloc_cb);
    }
 }
 extern rt_result InitPipelineManagement(void);
 extern void ShutdownPipelineManagement(void);
 extern rt_result InitRenderTargetManagement(void);
 extern void ShutdownRenderTargetManagement(void);
 extern rt_result InitCommandBufferManagement(void);
 extern void ShutdownCommandBufferManagement(void);
 extern rt_result InitializeSempahoreManagement(void);
 extern void ShutdownSemaphoreManagement(void);
 extern rt_result InitBufferManagement(void);
 extern void ShutdownBufferManagement(void);
 extern rt_result InitializeTransfers(void);
 extern void ShutdownTransfers(void);
 rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
    rtLog("vk", "Init");
    _tracking_alloc_cbs.pUserData       = NULL;
    _tracking_alloc_cbs.pfnAllocation   = TrackAllocation;
    _tracking_alloc_cbs.pfnReallocation = TrackReallocation;
    _tracking_alloc_cbs.pfnFree         = TrackFree;
    if (r_VkEnableAPIAllocTracking.i) {
        g_gpu.alloc_cb = &_tracking_alloc_cbs;
    } else {
        g_gpu.alloc_cb = NULL;
    }
    g_gpu.max_frames_in_flight = RT_RESTRICT_VALUE_TO_BOUNDS(r_VkMaxFramesInFlight.i,
                                                             RT_VK_MIN_SUPPORTED_FRAMES_IN_FLIGHT,
                                                             RT_VK_MAX_SUPPORTED_FRAMES_IN_FLIGHT);
    int res = CreateInstance();
    if (res != RT_SUCCESS)
        return res;
    res = CreateSurface(info);
    if (res != RT_SUCCESS)
        return res;
    res = ChoosePhysicalDevice();
    if (res != RT_SUCCESS)
        return res;
    res = CreateDevice();
    if (res != RT_SUCCESS)
        return res;
    res = CreateAllocator();
    if (res != RT_SUCCESS)
        return res;
    res = CreatePerFrameObjects();
    if (res != RT_SUCCESS)
        return res;
    res = InitPipelineManagement();
    if (res != RT_SUCCESS)
        return res;
    res = InitRenderTargetManagement();
    if (res != RT_SUCCESS)
        return res;
    res = InitializeSempahoreManagement();
    if (res != RT_SUCCESS)
        return res;
    res = InitCommandBufferManagement();
    if (res != RT_SUCCESS)
        return res;
    res = InitBufferManagement();
    if (res != RT_SUCCESS)
        return res;
    res = InitializeTransfers();
    if (res != RT_SUCCESS)
        return res;
    res = rtCreateSwapchain();
    if (res != RT_SUCCESS)
        return res;
    rtUpdateSwapchainRenderTarget();
    return RT_SUCCESS;
 }
 void RT_RENDERER_API_FN(Shutdown)(void) {
    rtLog("vk", "Shutdown");
    vkDeviceWaitIdle(g_gpu.device);
    rtDestroySwapchain();
    ShutdownTransfers();
    ShutdownBufferManagement();
    ShutdownCommandBufferManagement();
    ShutdownSemaphoreManagement();
    ShutdownRenderTargetManagement();
    ShutdownPipelineManagement();
    DestroyPerFrameObjects();
    DestroyAllocator();
    vkDestroyDevice(g_gpu.device, g_gpu.alloc_cb);
    vkDestroySurfaceKHR(g_gpu.instance, g_gpu.surface, g_gpu.alloc_cb);
 #ifdef RT_DEBUG
    vkDestroyDebugUtilsMessengerEXT(g_gpu.instance, g_gpu.messenger, g_gpu.alloc_cb);
 #endif
    vkDestroyInstance(g_gpu.instance, g_gpu.alloc_cb);
 }
 unsigned int RT_RENDERER_API_FN(GetMaxFramesInFlight)(void) {
    return g_gpu.max_frames_in_flight;
 }
--- a/src/renderer/vk/meson.build
+++ b/src/renderer/vk/meson.build
@ -1,56 +0,0 @@
 if vk_dep.found()
  platform_defs = []
  if get_option('use_xlib')
    platform_defs = ['-DVK_USE_PLATFORM_XLIB_KHR']
  elif host_machine.system() == 'windows'
    platform_defs = ['-DVK_USE_PLATFORM_WIN32_KHR']
  endif
  vk_inc_dep = vk_dep.partial_dependency(compile_args : true, includes : true)
  vk_renderer_lib = library('rtvk',
    # Project Sources
    'command_buffers.h',
    'gpu.h',
    'gpu_sync.h',
    'pipelines.h',
    'render_targets.h',
    'swapchain.h',
    'transfers.h',
    '../common/common_render_graph.h',
    'buffers.c',
    'command_buffers.c',
    'commands.c',
    'frame.c',
    'gpu_sync.c',
    'helper.c',
    'init.c',
    'pipelines.c',
    'render_graph.c',
    'render_targets.c',
    'swapchain.c',
    'transfers.c',
    'simple_sync_impl.cpp',
    '../common/common_render_graph.c',
    # Contrib Sources
    '../../../contrib/volk/volk.h',
    '../../../contrib/volk/volk.c',
    '../../../contrib/vma/vk_mem_alloc.h',
   'vma_impl.cpp',
    dependencies : [m_dep, vk_inc_dep, windowing_dep],
    include_directories : [engine_incdir, contrib_incdir],
    link_with : [runtime_lib],
    c_pch : 'pch/vk_pch.h',
    c_args : platform_defs,
    cpp_pch : 'pch/vk_pch.hpp',
    cpp_args : platform_defs,
    install : true)
    engine_libs += vk_renderer_lib
    engine_lib_paths += vk_renderer_lib.full_path()
 endif
--- a/src/renderer/vk/pch/vk_pch.h
+++ b/src/renderer/vk/pch/vk_pch.h
@ -1,22 +0,0 @@
 #include <volk/volk.h>
 #ifdef _WIN32
 #define WIN32_LEAN_AND_MEAN
 #include <Windows.h>
 #endif
 #if defined(VY_USE_XLIB)
 #include <X11/Xlib.h>
 #endif
 #include <stdlib.h>
 #include <string.h>
 /* GFX */
 #include "gfx/gfx.h"
 /* Commonly used runtime headers */
 #include "runtime/config.h"
 #include "runtime/mem_arena.h"
 #include "runtime/runtime.h"
 #include "runtime/threading.h"
--- a/src/renderer/vk/pch/vk_pch.hpp
+++ b/src/renderer/vk/pch/vk_pch.hpp
@ -1,3 +0,0 @@
 extern "C" {
 #include "vk_pch.h"
 }
--- a/src/renderer/vk/pipelines.c
+++ b/src/renderer/vk/pipelines.c
@ -1,186 +0,0 @@
 #include "runtime/config.h"
 #include "runtime/handles.h"
 #include "runtime/mem_arena.h"
 #include "runtime/resources.h"
 #include "runtime/threading.h"
 #include "gfx/renderer_api.h"
 #include "gfx/effect.h"
 #include "gpu.h"
 #include "pipelines.h"
 #include <stdlib.h>
 #include <volk/volk.h>
 RT_CVAR_I(r_VkMaxPipelineCount, "Maximum number of pipeline objects. Default: 1024", 1024);
 typedef struct rt_pipeline_s {
    uint32_t version;
    rt_pipeline pipeline;
    struct rt_pipeline_s *next_free;
 } rt_pipeline_slot;
 static rt_pipeline_slot *_pipelines;
 static rt_pipeline_slot *_first_free;
 static rt_rwlock _lock;
 static void DestroyPipeline(rt_pipeline_slot *slot) {
    if (slot->pipeline.pipeline) {
        vkDestroyPipeline(g_gpu.device, slot->pipeline.pipeline, g_gpu.alloc_cb);
    }
    slot->next_free = _first_free;
    _first_free     = slot;
 }
 static VkShaderModule CreateShaderModuleFromResource(rt_resource_id rid) {
    if (rid == RT_INVALID_RESOURCE_ID)
        return VK_NULL_HANDLE;
    rt_resource *resource = NULL;
    size_t size           = rtGetResourceSize(rid);
    if (size == 0)
        return VK_NULL_HANDLE;
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    if (!temp.arena)
        return VK_NULL_HANDLE;
    VkShaderModule module = VK_NULL_HANDLE;
    resource = rtArenaPush(temp.arena, size);
    if (!resource) {
        rtLog("VK", "Failed to allocate temporary memory for retrieving a shader resource");
        goto out;
    }
    if (rtGetResource(rid, resource) != RT_SUCCESS) {
        goto out;
    }
    if (resource->type != RT_RESOURCE_SHADER) {
        rtLog("VK", "Attempted to create a shader module from a non-shader resource %llx", rid);
        goto out;
    }
    rt_shader_info *info = resource->data;
    if (!info) {
        rtLog("VK", "Shader resource %llx has no attached shader_info", rid);
        goto out;
    }
    VkShaderModuleCreateInfo module_info = {.sType    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
                                            .pCode    = rtResolveRelptr(&info->bytecode),
                                            .codeSize = info->bytecode_length};
    if (vkCreateShaderModule(g_gpu.device, &module_info, g_gpu.alloc_cb, &module) != VK_SUCCESS) {
        rtLog("VK", "Failed to create the shader module from resource %llx", rid);
        goto out;
    }
 out:
    rtReturnTemporaryArena(temp);
    return module;
 }
 static bool CreateComputePipeline(VkShaderModule compute_shader,
                                  const rt_pipeline_info *info,
                                  rt_pipeline_slot *slot) {
    return false;
 }
 static bool CreateGraphicsPipeline(VkShaderModule vertex_shader,
                                   VkShaderModule fragment_shader,
                                   const rt_pipeline_info *info,
                                   rt_pipeline_slot *slot) {
    return false;
 }
 rt_result InitPipelineManagement(void) {
    rt_create_rwlock_result lock_res = rtCreateRWLock();
    if (!lock_res.ok)
        return RT_UNKNOWN_ERROR;
    _lock = lock_res.lock;
    _pipelines = calloc(r_VkMaxPipelineCount.i, sizeof(rt_pipeline_slot));
    if (!_pipelines) {
        rtDestroyRWLock(&_lock);
        return RT_OUT_OF_MEMORY;
    }
    /* Keep [0] unused to preserve 0 as the invalid handle */
    _first_free = &_pipelines[1];
    for (int i = 1; i < r_VkMaxPipelineCount.i - 1; ++i) {
        _pipelines[i].next_free = &_pipelines[i + 1];
    }
    return RT_SUCCESS;
 }
 void ShutdownPipelineManagement(void) {
    for (int i = 1; i < r_VkMaxPipelineCount.i; ++i) {
        DestroyPipeline(&_pipelines[i]);
    }
    free(_pipelines);
    rtDestroyRWLock(&_lock);
    _first_free = NULL;
 }
 rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *info) {
    rt_pipeline_handle handle = RT_INVALID_HANDLE;
    rtLockWrite(&_lock);
    if (!_first_free) {
        rtLog("VK", "No free pipeline slots!");
        rtUnlockWrite(&_lock);
        return handle;
    }
    rt_pipeline_slot *slot = _first_free;
    _first_free            = slot->next_free;
    slot->version          = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
    /* No other thread that calls compile gets the same slot.
     * Another thread accessing the slot via GetPipeline would get a version mismatch.
     * The same holds for DestroyPipeline
     */
    rtUnlockWrite(&_lock);
    VkShaderModule vertex_shader   = CreateShaderModuleFromResource(info->vertex_shader);
    VkShaderModule fragment_shader = CreateShaderModuleFromResource(info->fragment_shader);
    VkShaderModule compute_shader  = CreateShaderModuleFromResource(info->compute_shader);
    RT_UNUSED(vertex_shader);
    RT_UNUSED(fragment_shader);
    RT_UNUSED(compute_shader);
    bool create_success = false;
    if (compute_shader) {
        create_success = CreateComputePipeline(compute_shader, info, slot);
    } else if (vertex_shader && fragment_shader) {
        create_success = CreateGraphicsPipeline(vertex_shader, fragment_shader, info, slot);
    } else {
        rtLog("VK", "Invalid combination of shaders in pipeline info.");
    }
    if (create_success) {
        handle.version = slot->version;
        handle.index   = (uint32_t)(slot - _pipelines);
    }
    return handle;
 }
 void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
    if (handle.index >= (uint32_t)r_VkMaxPipelineCount.i)
        return;
    rtLockWrite(&_lock);
    if (_pipelines[handle.index].version == handle.version)
        DestroyPipeline(&_pipelines[handle.index]);
    else
        rtLog("VK", "Tried to destroy a pipeline using an outdated handle.");
    rtUnlockWrite(&_lock);
 }
 const rt_pipeline *rtGetPipeline(rt_pipeline_handle handle) {
    if (handle.index >= (uint32_t)r_VkMaxPipelineCount.i)
        return NULL;
    rtLockRead(&_lock);
    rt_pipeline *res = NULL;
    if (_pipelines[handle.index].version == handle.version)
        res = &_pipelines[handle.index].pipeline;
    else
        rtLog("VK", "Tried to access a pipeline using an outdated handle.");
    rtUnlockRead(&_lock);
    return res;
 }
--- a/src/renderer/vk/pipelines.h
+++ b/src/renderer/vk/pipelines.h
@ -1,15 +0,0 @@
 #ifndef RT_VK_PIPELINES_H
 #define RT_VK_PIPELINES_H
 #include <volk/volk.h>
 #include "gfx/renderer_api.h"
 typedef struct {
    VkPipeline pipeline;
 } rt_pipeline;
 /* A pipeline is immutable after creation. */
 const rt_pipeline *rtGetPipeline(rt_pipeline_handle handle);
 #endif
--- a/src/renderer/vk/render_graph.c
+++ b/src/renderer/vk/render_graph.c
@ -1,27 +0,0 @@
 #include "gpu.h"
 #include "gfx/renderer_api.h"
 #include "runtime/mem_arena.h"
 #include "../common/common_render_graph.h"
 #include "render_targets.h"
 static int RequireExplicitSynchronization(void) {
    return 1;
 }
 rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
    rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = rtCreateRenderTarget,
                                                      .RequireExplicitSynchronization =
                                                          RequireExplicitSynchronization};
    return rtCreateRenderGraphBuilder(&cbs);
 }
 void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
    rtDestroyRenderGraphBuilder(builder);
 }
 rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
    RT_NOT_IMPLEMENTED;
    return RT_UNKNOWN_ERROR;
 }
--- a/src/renderer/vk/render_targets.c
+++ b/src/renderer/vk/render_targets.c
@ -1,400 +0,0 @@
 #include "runtime/config.h"
 #include "runtime/threading.h"
 #include "gfx/renderer_api.h"
 #include "gpu.h"
 #include "render_targets.h"
 #include "swapchain.h"
 #include <stdlib.h>
 #include <volk/volk.h>
 RT_CVAR_I(r_VkMaxRenderTargetCount, "Maximum number of render target objects. Default: 1024", 1024);
 typedef struct rt_render_target_slot_s {
    uint32_t version;
    rt_render_target render_target;
    struct rt_render_target_slot_s *next_free;
 } rt_render_target_slot;
 static rt_render_target_slot *_render_targets;
 static rt_render_target_slot *_first_free;
 static rt_rwlock _lock;
 static rt_render_target_handle _swapchain_handle;
 static void DestroyRenderTarget(rt_render_target_slot *slot) {
    for (unsigned int i = 0; i < slot->render_target.image_count; ++i) {
        vkDestroyImageView(g_gpu.device, slot->render_target.view[i], g_gpu.alloc_cb);
        vmaDestroyImage(g_gpu.allocator,
                        slot->render_target.image[i],
                        slot->render_target.allocation[i]);
    }
    slot->next_free = _first_free;
    _first_free     = slot;
 }
 static bool CreateImageAndView(VkExtent2D extent,
                               VkFormat format,
                               VkSampleCountFlagBits sample_count,
                               VkImageUsageFlagBits usage,
                               VkImageAspectFlagBits aspect,
                               VkImage *p_image,
                               VmaAllocation *p_allocation,
                               VkImageView *p_view,
                               const char *rt_name,
                               uint32_t image_index) {
    uint32_t queue_families[3];
    uint32_t distinct_queue_families = 1;
    queue_families[0]                = g_gpu.graphics_family;
    if (g_gpu.compute_family != g_gpu.graphics_family)
        queue_families[distinct_queue_families++] = g_gpu.compute_family;
    if (g_gpu.present_family != g_gpu.graphics_family &&
        g_gpu.present_family != g_gpu.compute_family)
        queue_families[distinct_queue_families++] = g_gpu.present_family;
    VkFormatProperties2 props = {
        .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
    };
    vkGetPhysicalDeviceFormatProperties2(g_gpu.phys_device, format, &props);
    if ((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) == 0) {
        rtLog("vk",
              "Requested render target format %s can not be sampled.",
              rtVkFormatToString(format));
        usage &= ~VK_IMAGE_USAGE_SAMPLED_BIT;
    }
    if ((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) == 0) {
        rtLog("vk",
              "Requested render target format %s can not be used for storage.",
              rtVkFormatToString(format));
        usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
    }
    if (((usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) != 0) &&
        ((props.formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) ==
         0)) {
        rtReportError(
            "vk",
            "Tried to create a render target color attachment, but the format %s does not "
            "support the color attachment usage.",
            rtVkFormatToString(format));
        return false;
    } else if (((usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) != 0) &&
               ((props.formatProperties.optimalTilingFeatures &
                 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) == 0)) {
        rtReportError("vk",
                      "Tried to create a render target depth/stencil attachment, but the format %s"
                      "does not support the depth/stencil attachment usage.",
                      rtVkFormatToString(format));
        return false;
    }
    VkImageCreateInfo image_info = {
        .sType       = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
        .imageType   = VK_IMAGE_TYPE_2D,
        .format      = format,
        .extent      = {.width = extent.width, .height = extent.height, .depth = 1},
        .mipLevels   = 1,
        .arrayLayers = 1,
        .samples     = sample_count,
        .tiling      = VK_IMAGE_TILING_OPTIMAL,
        .usage       = usage,
        .sharingMode =
            (distinct_queue_families > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE,
        .pQueueFamilyIndices   = (distinct_queue_families > 1) ? queue_families : NULL,
        .queueFamilyIndexCount = distinct_queue_families,
    };
    VmaAllocationCreateInfo alloc_info = {
        .usage = VMA_MEMORY_USAGE_GPU_ONLY,
    };
    VkImage image;
    VmaAllocation allocation;
    if (vmaCreateImage(g_gpu.allocator, &image_info, &alloc_info, &image, &allocation, NULL) !=
        VK_SUCCESS) {
        return false;
    }
    VkImageViewCreateInfo view_info = {
        .sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
        .image      = image,
        .viewType   = VK_IMAGE_VIEW_TYPE_2D,
        .format     = format,
        .components = {.r = VK_COMPONENT_SWIZZLE_IDENTITY,
                       .g = VK_COMPONENT_SWIZZLE_IDENTITY,
                       .b = VK_COMPONENT_SWIZZLE_IDENTITY,
                       .a = VK_COMPONENT_SWIZZLE_IDENTITY},
 /* clang-format off */
            .subresourceRange = {
                           .aspectMask     = aspect,
                           .baseArrayLayer = 0,
                           .baseMipLevel   = 0,
                           .layerCount     = 1,               
                           .levelCount     = 1,
            },
  /* clang-format on */
    };
    VkImageView view;
    if (vkCreateImageView(g_gpu.device, &view_info, g_gpu.alloc_cb, &view) != VK_SUCCESS) {
        rtLog("VK", "Failed to create render target image view");
        vmaDestroyImage(g_gpu.allocator, image, allocation);
        return false;
    }
 #ifdef RT_DEBUG
    char name[260];
    rtSPrint(name, 260, "%s (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index);
    VkDebugUtilsObjectNameInfoEXT name_info = {
        .sType        = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
        .objectHandle = (uint64_t)image,
        .pObjectName  = name,
        .objectType   = VK_OBJECT_TYPE_IMAGE};
    vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
    rtSPrint(name, 260, "%s [view] (%u)", rt_name ? rt_name : "unnamed rendertarget", image_index);
    name_info =
        (VkDebugUtilsObjectNameInfoEXT){.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
                                        .objectHandle = (uint64_t)view,
                                        .pObjectName  = name,
                                        .objectType   = VK_OBJECT_TYPE_IMAGE_VIEW};
    vkSetDebugUtilsObjectNameEXT(g_gpu.device, &name_info);
 #endif
    *p_image      = image;
    *p_allocation = allocation;
    *p_view       = view;
    return true;
 }
 rt_result InitRenderTargetManagement(void) {
    rt_create_rwlock_result lock_res = rtCreateRWLock();
    if (!lock_res.ok)
        return RT_UNKNOWN_ERROR;
    _lock = lock_res.lock;
    _render_targets = calloc(r_VkMaxRenderTargetCount.i, sizeof(rt_render_target_slot));
    if (!_render_targets) {
        rtDestroyRWLock(&_lock);
        return RT_OUT_OF_MEMORY;
    }
    /* Keep [0] unused to preserve 0 as the invalid handle */
    _first_free = &_render_targets[1];
    for (int i = 1; i < r_VkMaxRenderTargetCount.i - 1; ++i) {
        _render_targets[i].next_free = &_render_targets[i + 1];
    }
    /* Reserve the slot for the swap chain rt */
    rt_render_target_slot *slot = _first_free;
    _first_free                 = slot->next_free;
    slot->version               = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
    _swapchain_handle           = (rt_render_target_handle){.version = slot->version,
                                                            .index   = (uint32_t)(slot - _render_targets)};
    return RT_SUCCESS;
 }
 void ShutdownRenderTargetManagement(void) {
    for (int i = 1; i < r_VkMaxRenderTargetCount.i; ++i) {
        DestroyRenderTarget(&_render_targets[i]);
    }
    free(_render_targets);
    rtDestroyRWLock(&_lock);
    _first_free = NULL;
 }
 rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info) {
    rt_render_target_handle handle = {0};
    rtLockWrite(&_lock);
    if (!_first_free) {
        rtLog("VK", "No free render target slots!");
        rtUnlockWrite(&_lock);
        return handle;
    }
    rt_render_target_slot *slot = _first_free;
    _first_free                 = slot->next_free;
    slot->version               = (slot->version + 1) & RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
    /* No other thread that calls compile gets the same slot.
     * Another thread accessing the slot via GetPipeline would get a version mismatch.
     * The same holds for DestroyPipeline
     */
    rtUnlockWrite(&_lock);
    const char *name = info->name;
    slot->render_target.match_swapchain = 0;
    slot->render_target.image_count     = g_swapchain.image_count;
    for (unsigned int i = 0; i < g_swapchain.image_count; ++i) {
        uint32_t width = info->width, height = info->height;
        if (width == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
            width = g_swapchain.extent.width;
            slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE;
        }
        if (height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
            height = g_swapchain.extent.height;
            slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE;
        }
        slot->render_target.extent = (VkExtent2D){.width = width, .height = height};
        if (info->format != RT_PIXEL_FORMAT_SWAPCHAIN)
            slot->render_target.format = rtPixelFormatToVkFormat(info->format);
        else {
            slot->render_target.format = g_swapchain.format;
            slot->render_target.match_swapchain |= RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT;
        }
        if (info->format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8 ||
            info->format == RT_PIXEL_FORMAT_DEPTH32) {
            slot->render_target.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
                                        VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
            if (info->format == RT_PIXEL_FORMAT_DEPTH32)
                slot->render_target.aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
            else
                slot->render_target.aspect =
                    VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
        } else {
            slot->render_target.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
                                        VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
            slot->render_target.aspect = VK_IMAGE_ASPECT_COLOR_BIT;
        }
        slot->render_target.sample_count = rtSampleCountToFlags(info->samples);
        if (!CreateImageAndView(slot->render_target.extent,
                                slot->render_target.format,
                                slot->render_target.sample_count,
                                slot->render_target.usage,
                                slot->render_target.aspect,
                                &slot->render_target.image[i],
                                &slot->render_target.allocation[i],
                                &slot->render_target.view[i],
                                name,
                                i)) {
            slot->render_target.image_count = i;
            DestroyRenderTarget(slot);
            goto out;
        }
        slot->render_target.states[i] = RT_RENDER_TARGET_STATE_INVALID;
    }
    handle.version = slot->version;
    handle.index   = (uint32_t)(slot - _render_targets);
 out:
    return handle;
 }
 void rtDestroyRenderTarget(rt_render_target_handle handle) {
    if (handle.index >= (uint32_t)r_VkMaxRenderTargetCount.i)
        return;
    rtLockWrite(&_lock);
    if (_render_targets[handle.index].version == handle.version)
        DestroyRenderTarget(&_render_targets[handle.index]);
    else
        rtLog("VK", "Tried to destroy a render target using an outdated handle.");
    rtUnlockWrite(&_lock);
 }
 rt_render_target *rtGetRenderTarget(rt_render_target_handle handle) {
    if (handle.index >= (uint32_t)r_VkMaxRenderTargetCount.i)
        return NULL;
    rtLockRead(&_lock);
    rt_render_target *res = NULL;
    if (_render_targets[handle.index].version == handle.version)
        res = &_render_targets[handle.index].render_target;
    else
        rtLog("VK", "Tried to access a render target using an outdated handle.");
    rtUnlockRead(&_lock);
    return res;
 }
 rt_render_target_handle rtGetSwapchainRenderTarget(void) {
    return _swapchain_handle;
 }
 void rtUpdateSwapchainRenderTarget(void) {
    RT_ASSERT(_swapchain_handle.index != 0, "Invalid swap chain render target!");
    rt_render_target_slot *slot = &_render_targets[_swapchain_handle.index];
    rt_render_target *rt        = &slot->render_target;
    rt->match_swapchain = 0;
    rt->format          = g_swapchain.format;
    rt->extent          = g_swapchain.extent;
    rt->sample_count    = 1;
    rt->usage           = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
    rt->aspect          = VK_IMAGE_ASPECT_COLOR_BIT;
    for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
        rt->allocation[i] = NULL;
        rt->image[i]      = g_swapchain.images[i];
        rt->view[i]       = g_swapchain.image_views[i];
        rt->states[i]     = RT_RENDER_TARGET_STATE_INVALID;
    }
 }
 void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, VkExtent2D extent) {
    rtLockWrite(&_lock);
    for (uint32_t i = 1; i < (uint32_t)r_VkMaxRenderTargetCount.i; ++i) {
        if (_render_targets[i].render_target.image_count == 0)
            continue;
        rt_render_target *render_target = &_render_targets[i].render_target;
        if (render_target->match_swapchain != 0) {
            for (uint32_t j = 0; j < render_target->image_count; ++j) {
                vkDestroyImageView(g_gpu.device, render_target->view[j], g_gpu.alloc_cb);
                vmaDestroyImage(g_gpu.allocator,
                                render_target->image[j],
                                render_target->allocation[j]);
            }
            if ((render_target->match_swapchain & RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT) != 0) {
                render_target->format = format;
            } else if ((render_target->match_swapchain & RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE) !=
                       0) {
                render_target->extent = extent;
            }
            for (uint32_t j = 0; j < image_count; ++j) {
                if (!CreateImageAndView(render_target->extent,
                                        render_target->format,
                                        render_target->sample_count,
                                        render_target->usage,
                                        render_target->aspect,
                                        &render_target->image[j],
                                        &render_target->allocation[j],
                                        &render_target->view[j],
                                        NULL,
                                        j)) {
                    render_target->image_count = j;
                    DestroyRenderTarget(&_render_targets[i]);
                    rtReportError("VK", "Failed to recreate swapchain-matching render target");
                    break;
                }
            }
        } else if (render_target->image_count < image_count) {
            /* Create additional images */
            for (uint32_t j = render_target->image_count; j < image_count; ++j) {
                if (!CreateImageAndView(render_target->extent,
                                        render_target->format,
                                        render_target->sample_count,
                                        render_target->usage,
                                        render_target->aspect,
                                        &render_target->image[j],
                                        &render_target->allocation[j],
                                        &render_target->view[j],
                                        NULL,
                                        j)) {
                    render_target->image_count = j;
                    DestroyRenderTarget(&_render_targets[i]);
                    rtReportError("VK", "Failed to create additional render target images");
                    break;
                }
            }
        } else if (render_target->image_count > image_count) {
            /* Delete unnecessary images */
            for (uint32_t j = image_count; j < render_target->image_count; ++j) {
                vkDestroyImageView(g_gpu.device, render_target->view[j], g_gpu.alloc_cb);
                vmaDestroyImage(g_gpu.allocator,
                                render_target->image[j],
                                render_target->allocation[j]);
            }
        }
        render_target->image_count = image_count;
    }
    rtUnlockWrite(&_lock);
 }
--- a/src/renderer/vk/render_targets.h
+++ b/src/renderer/vk/render_targets.h
@ -1,44 +0,0 @@
 #ifndef RT_VK_RENDER_TARGETS_H
 #define RT_VK_RENDER_TARGETS_H
 #include "gpu.h"
 #include "gfx/renderer_api.h"
 #include "../common/common_render_graph.h"
 /* Must match RT_VK_MAX_SWAPCHAIN_IMAGES */
 #define RT_VK_RENDER_TARGET_MAX_IMAGES 3
 typedef enum {
    RT_RENDER_TARGET_MATCH_SWAPCHAIN_SIZE = 0x01,
    RT_RENDER_TARGET_MATCH_SWAPCHAIN_FORMAT = 0x02,
 } rt_render_target_match_swapchain_flags;
 typedef struct {
    VkImage image[RT_VK_RENDER_TARGET_MAX_IMAGES];
    VkImageView view[RT_VK_RENDER_TARGET_MAX_IMAGES];
    VmaAllocation allocation[RT_VK_RENDER_TARGET_MAX_IMAGES];
    rt_render_target_state states[RT_VK_RENDER_TARGET_MAX_IMAGES];
    VkSampleCountFlagBits sample_count;
    VkFormat format;
    VkExtent2D extent;
    VkImageUsageFlagBits usage;
    VkImageAspectFlags aspect;
    unsigned int image_count;
    rt_render_target_match_swapchain_flags match_swapchain;
 } rt_render_target;
 rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info);
 void rtDestroyRenderTarget(rt_render_target_handle handle);
 rt_render_target *rtGetRenderTarget(rt_render_target_handle handle);
 rt_render_target_handle rtGetSwapchainRenderTarget(void);
 /* Update the render target that represents the swap chain */
 void rtUpdateSwapchainRenderTarget(void);
 /* Update render targets that match the swap chain*/
 void rtUpdateRenderTargetsFromSwapchain(uint32_t image_count, VkFormat format, VkExtent2D extent);
 #endif
--- a/src/renderer/vk/resources.h
+++ b/src/renderer/vk/resources.h
@ -1,80 +0,0 @@
 #ifndef RT_VK_RESOURCES_H
 #define RT_VK_RESOURCES_H
 /* Buffers and images */
 #include "gpu.h"
 #include "runtime/threading.h"
 typedef enum {
    RT_BUFFER_STATE_INVALID,
    RT_BUFFER_STATE_NOT_USED,
    RT_BUFFER_STATE_IN_USE,
    RT_BUFFER_STATE_IN_TRANSFER,
 } rt_buffer_state;
 typedef struct {
    VkBuffer buffer;
    VmaAllocation allocation;
    size_t size;
    rt_buffer_usage usage;
    rt_buffer_type type;
    rt_buffer_state state;
    rt_rwlock lock;
    bool mappable;
    bool coherent;
    rt_gpu_queue owner;
 } rt_buffer;
 rt_buffer *rtGetBuffer(rt_buffer_handle handle);
 /* Helper functions for accessing buffers */
 RT_INLINE rt_gpu_queue rtGetBufferOwner(rt_buffer_handle handle) {
    rt_buffer *buffer  = rtGetBuffer(handle);
    rt_gpu_queue owner = RT_VK_UNOWNED;
    if (buffer) {
        rtLockRead(&buffer->lock);
        owner = buffer->owner;
        rtUnlockRead(&buffer->lock);
    }
    return owner;
 }
 RT_INLINE void rtSetBufferOwner(rt_buffer_handle handle, rt_gpu_queue owner) {
    rt_buffer *buffer = rtGetBuffer(handle);
    if (buffer) {
        rtLockWrite(&buffer->lock);
        buffer->owner = owner;
        rtUnlockWrite(&buffer->lock);
    }
 }
 RT_INLINE rt_buffer_state rtGetBufferState(rt_buffer_handle handle) {
    rt_buffer *buffer  = rtGetBuffer(handle);
    rt_buffer_state state = RT_BUFFER_STATE_INVALID;
    if (buffer) {
        rtLockRead(&buffer->lock);
        state = buffer->state;
        rtUnlockRead(&buffer->lock);
    }
    return state;
 }
 RT_INLINE void rtSetBufferState(rt_buffer_handle handle, rt_buffer_state state) {
    rt_buffer *buffer     = rtGetBuffer(handle);
    if (buffer) {
        rtLockWrite(&buffer->lock);
        buffer->state = state;
        rtUnlockWrite(&buffer->lock);
    }
 }
 #endif
--- a/src/renderer/vk/simple_sync_impl.cpp
+++ b/src/renderer/vk/simple_sync_impl.cpp
@ -1,6 +0,0 @@
 #include "gpu.h"
 extern "C" {
 #define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION
 #include "simple_vulkan_synchronization/thsvs_simpler_vulkan_synchronization.h"
 }
--- a/src/renderer/vk/swapchain.c
+++ b/src/renderer/vk/swapchain.c
@ -1,205 +0,0 @@
 #define RT_VK_DONT_DEFINE_SWAPCHAIN_GLOBAL
 #include "swapchain.h"
 #include "gpu.h"
 #include "runtime/config.h"
 #include <stdlib.h>
 #ifdef _WIN32
 #define WIN32_LEAN_AND_MEAN
 #include <Windows.h>
 #elif defined(RT_USE_XLIB)
 #include <X11/Xlib.h>
 #endif
 RT_CVAR_I(r_VkPreferredSwapchainImages,
          "Preferred number of swapchain iamges. [2/3] Default: 2",
          2);
 RT_CVAR_I(r_VkPreferMailboxMode, "Prefer mailbox present mode over fifo mode. [0/1] Default: 0", 1);
 typedef struct {
    VkPresentModeKHR present_mode;
    VkSurfaceFormatKHR surface_format;
    VkExtent2D extent;
    VkSurfaceTransformFlagsKHR pre_transform;
 } rt_device_swapchain_parameters;
 static rt_device_swapchain_parameters DetermineSwapchainParameters(void) {
    rt_device_swapchain_parameters params;
    /* determine presentation mode. FIFO should always be available.
     * TODO: If vsync is enabled, we should always choose FIFO.
     */
    params.present_mode = VK_PRESENT_MODE_FIFO_KHR;
    if (r_VkPreferMailboxMode.i) {
        VkPresentModeKHR modes[6];
        uint32_t count = 6;
        vkGetPhysicalDeviceSurfacePresentModesKHR(g_gpu.phys_device, g_gpu.surface, &count, modes);
        for (uint32_t i = 0; i < count; ++i) {
            if (modes[i] == VK_PRESENT_MODE_MAILBOX_KHR)
                params.present_mode = VK_PRESENT_MODE_MAILBOX_KHR;
        }
    }
    /* Determine surface format */
    VkSurfaceFormatKHR formats[64];
    uint32_t format_count = 64;
    vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, NULL);
    vkGetPhysicalDeviceSurfaceFormatsKHR(g_gpu.phys_device, g_gpu.surface, &format_count, formats);
    params.surface_format = formats[0];
    for (uint32_t i = 0; i < format_count; ++i) {
        if (formats[i].format == VK_FORMAT_B8G8R8A8_SRGB &&
            formats[i].colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
            params.surface_format = formats[i];
            break;
        }
    }
    /* get extent */
    VkSurfaceCapabilitiesKHR capabilities;
    vkGetPhysicalDeviceSurfaceCapabilitiesKHR(g_gpu.phys_device, g_gpu.surface, &capabilities);
    if (capabilities.currentExtent.width != UINT32_MAX) {
        params.extent = capabilities.currentExtent;
    } else {
 #ifdef _WIN32
        RECT client_area;
        GetClientRect(g_gpu.native_window.hWnd, &client_area);
        params.extent.width  = (uint32_t)client_area.right;
        params.extent.height = (uint32_t)client_area.bottom;
 #else
        XWindowAttributes attribs;
        XGetWindowAttributes(g_gpu.native_window.display, g_gpu.native_window.window, &attribs);
        params.extent.width  = (uint32_t)attribs.width;
        params.extent.height = (uint32_t)attribs.height;
 #endif
    }
    params.pre_transform = capabilities.currentTransform;
    return params;
 }
 rt_swapchain g_swapchain;
 rt_result rtCreateSwapchain(void) {
    rt_device_swapchain_parameters device_params = DetermineSwapchainParameters();
    uint32_t image_count = r_VkPreferredSwapchainImages.i;
    if (image_count < 2)
        image_count = 2;
    else if (image_count > 3)
        image_count = 3;
    VkSwapchainCreateInfoKHR swapchain_info = {
        .sType            = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
        .surface          = g_gpu.surface,
        .presentMode      = device_params.present_mode,
        .imageFormat      = device_params.surface_format.format,
        .imageColorSpace  = device_params.surface_format.colorSpace,
        .imageExtent      = device_params.extent,
        .preTransform     = device_params.pre_transform,
        .compositeAlpha   = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
        .clipped          = VK_TRUE,
        .minImageCount    = image_count,
        .imageUsage       = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
        .imageArrayLayers = 1,
        .oldSwapchain     = VK_NULL_HANDLE,
    };
    uint32_t queue_families[] = {g_gpu.graphics_family, g_gpu.present_family};
    if (g_gpu.present_family != g_gpu.graphics_family) {
        swapchain_info.imageSharingMode      = VK_SHARING_MODE_CONCURRENT;
        swapchain_info.pQueueFamilyIndices   = queue_families;
        swapchain_info.queueFamilyIndexCount = 2;
    } else {
        swapchain_info.imageSharingMode      = VK_SHARING_MODE_EXCLUSIVE;
        swapchain_info.pQueueFamilyIndices   = NULL;
        swapchain_info.queueFamilyIndexCount = 0;
    }
    if (vkCreateSwapchainKHR(g_gpu.device,
                             &swapchain_info,
                             g_gpu.alloc_cb,
                             &g_swapchain.swapchain) != VK_SUCCESS) {
        rtReportError("vk", "Failed to create the swapchain");
        return 50;
    }
    g_swapchain.format = device_params.surface_format.format;
    g_swapchain.extent = device_params.extent;
    /* Retrieve images */
    g_swapchain.image_count = 0;
    vkGetSwapchainImagesKHR(g_gpu.device, g_swapchain.swapchain, &g_swapchain.image_count, NULL);
    if (g_swapchain.image_count > RT_VK_MAX_SWAPCHAIN_IMAGES) {
        rtReportError("vk", "Unsupported number of swapchain images: %u", g_swapchain.image_count);
        return 51;
    }
    vkGetSwapchainImagesKHR(g_gpu.device,
                            g_swapchain.swapchain,
                            &g_swapchain.image_count,
                            g_swapchain.images);
    /* Create image views */
    for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
        VkImageViewCreateInfo view_info = {
            .sType    = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
            .image    = g_swapchain.images[i],
            .format   = g_swapchain.format,
            .viewType = VK_IMAGE_VIEW_TYPE_2D,
            .components =
                {
                             .r = VK_COMPONENT_SWIZZLE_IDENTITY,
                             .g = VK_COMPONENT_SWIZZLE_IDENTITY,
                             .b = VK_COMPONENT_SWIZZLE_IDENTITY,
                             .a = VK_COMPONENT_SWIZZLE_IDENTITY,
                             },
            .subresourceRange =
                {
                             .aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
                             .baseArrayLayer = 0,
                             .layerCount     = 1,
                             .baseMipLevel   = 0,
                             .levelCount     = 1,
                             },
        };
        if (vkCreateImageView(g_gpu.device,
                              &view_info,
                              g_gpu.alloc_cb,
                              &g_swapchain.image_views[i]) != VK_SUCCESS) {
            rtReportError("vk", "Failed to create an image view for the swapchain.");
            return 52;
        }
    }
    /* Create fences */
    for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
        VkFenceCreateInfo fence_info = {
            .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
            /* Create as signalled so that we can wait on it the first time we render to that
               swapchain image. */
            .flags = VK_FENCE_CREATE_SIGNALED_BIT,
        };
        if (vkCreateFence(g_gpu.device,
                          &fence_info,
                          g_gpu.alloc_cb,
                          &g_swapchain.image_fences[i]) != VK_SUCCESS) {
            rtReportError("vk", "Failed to create a fence for the swapchain");
            return 53;
        }
    }
    return RT_SUCCESS;
 }
 rt_result rtRecreateSwapchain(void) {
    /* TODO(Kevin): Old swapchain in swapchain create info */
    rtDestroySwapchain();
    return rtCreateSwapchain();
 }
 void rtDestroySwapchain(void) {
    for (uint32_t i = 0; i < g_swapchain.image_count; ++i) {
        vkDestroyFence(g_gpu.device, g_swapchain.image_fences[i], g_gpu.alloc_cb);
        vkDestroyImageView(g_gpu.device, g_swapchain.image_views[i], g_gpu.alloc_cb);
    }
    vkDestroySwapchainKHR(g_gpu.device, g_swapchain.swapchain, g_gpu.alloc_cb);
 }
--- a/src/renderer/vk/swapchain.h
+++ b/src/renderer/vk/swapchain.h
@ -1,30 +0,0 @@
 #ifndef RT_VK_SWAPCHAIN_H
 #define RT_VK_SWAPCHAIN_H
 #include <volk/volk.h>
 #include "runtime/runtime.h"
 #define RT_VK_MAX_SWAPCHAIN_IMAGES 3
 typedef struct {
    VkSwapchainKHR swapchain;
    VkImage images[RT_VK_MAX_SWAPCHAIN_IMAGES];
    VkImageView image_views[RT_VK_MAX_SWAPCHAIN_IMAGES];
    VkFence image_fences[RT_VK_MAX_SWAPCHAIN_IMAGES];
    uint32_t image_count;
    VkFormat format;
    VkExtent2D extent;
 } rt_swapchain;
 #ifndef RT_VK_DONT_DEFINE_SWAPCHAIN_GLOBAL
 extern rt_swapchain g_swapchain;
 #endif
 rt_result rtCreateSwapchain(void);
 rt_result rtRecreateSwapchain(void);
 void rtDestroySwapchain(void);
 #endif
--- a/src/renderer/vk/transfers.c
+++ b/src/renderer/vk/transfers.c
@ -1,263 +0,0 @@
 #include "transfers.h"
 #include "command_buffers.h"
 #include "runtime/config.h"
 #include "runtime/mem_arena.h"
 #include "runtime/threading.h"
 #include <stdbool.h>
 RT_CVAR_I(rt_VkTransferSlotCount,
          "Number of available transfer slots per frame. Default: 512",
          512);
 /* This is a temporary solution. We probably should keep a pool of buffers
 * to avoid re-creating the buffers all the time. */
 typedef struct {
    VkBuffer buffer;
    VmaAllocation allocation;
    bool requires_flush;
 } rt_transfer_buffer;
 typedef struct {
    rt_transfer_buffer tbuf;
    VkFence fence;
    VkSemaphore ownership_transfer;
 } rt_transfer;
 static rt_transfer *_transfers;
 static uint32_t _transfer_count;
 static rt_mutex *_transfer_lock;
 static rt_transfer_buffer AcquireTransferBuffer(size_t size) {
    rt_transfer_buffer tbuf = {VK_NULL_HANDLE};
    VkBufferCreateInfo buffer_info = {
        .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
        .size        = size,
        .usage       = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
    };
    VmaAllocationCreateInfo alloc_info = {
        .usage = VMA_MEMORY_USAGE_AUTO,
        .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
    };
    if (vmaCreateBuffer(g_gpu.allocator,
                        &buffer_info,
                        &alloc_info,
                        &tbuf.buffer,
                        &tbuf.allocation,
                        NULL) == VK_SUCCESS) {
        VkMemoryPropertyFlags props;
        vmaGetAllocationMemoryProperties(g_gpu.allocator, tbuf.allocation, &props);
        tbuf.requires_flush = (props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0;
    }
    return tbuf;
 }
 static void ReturnTransferBuffer(rt_transfer_buffer buffer) {
    vmaDestroyBuffer(g_gpu.allocator, buffer.buffer, buffer.allocation);
 }
 static void CopyToTransferBuffer(rt_transfer_buffer buffer, const void *data, size_t n) {
    void *tmem = NULL;
    vmaMapMemory(g_gpu.allocator, buffer.allocation, &tmem);
    RT_ASSERT(tmem, "Transfer Buffer memory must be mappable.");
    memcpy(tmem, data, n);
    vmaUnmapMemory(g_gpu.allocator, buffer.allocation);
    if (buffer.requires_flush)
        vmaFlushAllocation(g_gpu.allocator, buffer.allocation, 0, n);
 }
 rt_result InitializeTransfers(void) {
    _transfer_lock = rtCreateMutex();
    if (!_transfer_lock)
        return RT_UNKNOWN_ERROR;
    _transfers = calloc((size_t)rt_VkTransferSlotCount.i, sizeof(rt_transfer));
    if (!_transfers) {
        rtDestroyMutex(_transfer_lock);
        return RT_OUT_OF_MEMORY;
    }
    _transfer_count = 0;
    return RT_SUCCESS;
 }
 void ShutdownTransfers(void) {
    rtDestroyMutex(_transfer_lock);
    for (int i = 0; i < rt_VkTransferSlotCount.i; ++i) {
        if (_transfers[i].fence)
            vkDestroyFence(g_gpu.device, _transfers[i].fence, g_gpu.alloc_cb);
    }
    free(_transfers);
 }
 #define TRANSFER_FAILED     -1
 #define TRANSFER_NOT_NEEDED 0
 #define TRANSFER_STARTED    1
 static int AcquireBufferOwnership(rt_transfer *transfer,
                                  VkBuffer buffer,
                                  rt_gpu_queue current_owner,
                                  VkCommandBuffer transfer_cmd) {
    if (!transfer->ownership_transfer) {
        VkSemaphoreCreateInfo sem_info = {
            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
        };
        if (vkCreateSemaphore(g_gpu.device,
                              &sem_info,
                              g_gpu.alloc_cb,
                              &transfer->ownership_transfer) != VK_SUCCESS) {
            rtReportError("vk", "Failed to create an ownership transfer semaphore.");
            return TRANSFER_FAILED;
        }
    }
    uint32_t src_family = rtGetQueueFamily(current_owner);
    uint32_t dst_family = rtGetQueueFamily(RT_TRANSFER_QUEUE);
    if (src_family == dst_family)
        return TRANSFER_NOT_NEEDED;
    VkCommandBuffer cmd                 = rtAllocSingleCommandBuffer(current_owner);
    VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
                                           .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
    vkBeginCommandBuffer(cmd, &begin_info);
    VkBufferMemoryBarrier2 release_barrier = {
        .sType               = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
        .buffer              = buffer,
        .offset              = 0,
        .size                = VK_WHOLE_SIZE,
        .srcStageMask        = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
        .srcAccessMask       = 0,
        .srcQueueFamilyIndex = src_family,
        .dstQueueFamilyIndex = dst_family,
    };
    VkDependencyInfo dep = {.sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
                            .pBufferMemoryBarriers    = &release_barrier,
                            .bufferMemoryBarrierCount = 1};
    vkCmdPipelineBarrier2(cmd, &dep);
    vkEndCommandBuffer(cmd);
    VkBufferMemoryBarrier2 acquire_barrier = {
        .sType               = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
        .buffer              = buffer,
        .offset              = 0,
        .size                = VK_WHOLE_SIZE,
        .dstStageMask        = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
        .dstAccessMask       = VK_ACCESS_MEMORY_WRITE_BIT,
        .srcQueueFamilyIndex = src_family,
        .dstQueueFamilyIndex = dst_family,
    };
    VkDependencyInfo dep2 = {.sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
                             .pBufferMemoryBarriers    = &acquire_barrier,
                             .bufferMemoryBarrierCount = 1};
    vkCmdPipelineBarrier2(transfer_cmd, &dep2);
    /* Only transfer the ownership when the frame is finished */
    VkSemaphore wait_semaphore = VK_NULL_HANDLE;
    rt_frame_data *frame       = rtGetFrameData(g_gpu.current_frame_id);
    wait_semaphore             = frame->render_finished;
    uint32_t dummy = 0;
    if (rtSubmitSingleCommandBuffer(cmd,
                                    &wait_semaphore,
                                    &dummy,
                                    1,
                                    &transfer->ownership_transfer,
                                    &dummy,
                                    1,
                                    current_owner,
                                    VK_NULL_HANDLE) != RT_SUCCESS)
        return TRANSFER_FAILED;
    return TRANSFER_STARTED;
 }
 rt_result rtUploadToBuffer(VkBuffer buffer,
                           VmaAllocation allocation,
                           rt_gpu_queue current_owner,
                           const void *data,
                           size_t nbytes) {
    rtLockMutex(_transfer_lock);
    rt_transfer *transfer =
        (int)_transfer_count < rt_VkTransferSlotCount.i ? &_transfers[_transfer_count++] : NULL;
    rtUnlockMutex(_transfer_lock);
    if (!transfer)
        return RT_NO_TRANSFER_SLOTS;
    transfer->tbuf = AcquireTransferBuffer(nbytes);
    if (!transfer->tbuf.buffer) {
        return RT_OUT_OF_MEMORY;
    }
    CopyToTransferBuffer(transfer->tbuf, data, nbytes);
    if (!transfer->fence) {
        VkFenceCreateInfo fence_info = {
            .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
        };
        if (vkCreateFence(g_gpu.device, &fence_info, g_gpu.alloc_cb, &transfer->fence) !=
            VK_SUCCESS) {
            return RT_UNKNOWN_ERROR;
        }
    }
    VkCommandBuffer cmd                 = rtAllocSingleCommandBuffer(RT_TRANSFER_QUEUE);
    VkCommandBufferBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
                                           .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
    vkBeginCommandBuffer(cmd, &begin_info);
    bool requires_ownership_transfer =
        (current_owner != RT_TRANSFER_QUEUE && current_owner != RT_VK_UNOWNED);
    if (requires_ownership_transfer) {
        int did_transfer = AcquireBufferOwnership(transfer, buffer, current_owner, cmd);
        if (did_transfer == -1)
            return RT_UNKNOWN_ERROR;
        else if (did_transfer == TRANSFER_NOT_NEEDED)
            requires_ownership_transfer = false;
    }
    VkBufferCopy region = {.srcOffset = 0, .dstOffset = 0, .size = nbytes};
    vkCmdCopyBuffer(cmd, transfer->tbuf.buffer, buffer, 1, &region);
    vkEndCommandBuffer(cmd);
    uint32_t dummy = 0;
    return rtSubmitSingleCommandBuffer(cmd,
                                       requires_ownership_transfer ? &transfer->ownership_transfer
                                                                   : NULL,
                                       requires_ownership_transfer ? &dummy : NULL,
                                       requires_ownership_transfer ? 1 : 0,
                                       NULL,
                                       NULL,
                                       0,
                                       RT_TRANSFER_QUEUE,
                                       transfer->fence);
 }
 /* Wait until transfers to gpu resources are finished. */
 void rtFlushGPUTransfers(void) {
    if (_transfer_count == 0)
        return;
    rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
    RT_ASSERT(temp.arena, "Could not get a temporary arena for flushing gpu transfers.");
    rtLockMutex(_transfer_lock);
    VkFence *fences = RT_ARENA_PUSH_ARRAY(temp.arena, VkFence, _transfer_count);
    if (!fences) {
        rtReportError("vk", "Failed to allocate fences array for flushing gpu transfers.");
        rtUnlockMutex(_transfer_lock);
        return;
    }
    uint32_t count = 0;
    for (uint32_t i = 0; i < _transfer_count; ++i) {
        if (!_transfers[i].fence || !_transfers[i].tbuf.buffer)
            continue;
        fences[count++] = _transfers[i].fence;
    }
    vkWaitForFences(g_gpu.device, count, fences, VK_TRUE, UINT64_MAX);
    for (uint32_t i = 0; i < _transfer_count; ++i) {
        if (!_transfers[i].fence || !_transfers[i].tbuf.buffer)
            continue;
        ReturnTransferBuffer(_transfers[i].tbuf);
    }
    _transfer_count = 0;
    rtUnlockMutex(_transfer_lock);
 }
--- a/src/renderer/vk/transfers.h
+++ b/src/renderer/vk/transfers.h
@ -1,16 +0,0 @@
 #ifndef RT_VK_TRANSFERS_H
 #define RT_VK_TRANSFERS_H
 #include "gpu.h"
 #include "runtime/runtime.h"
 enum {
    RT_NO_TRANSFER_SLOTS = RT_CUSTOM_ERROR_START,
 };
 rt_result rtUploadToBuffer(VkBuffer buffer, VmaAllocation allocation, rt_gpu_queue current_owner, const void *data, size_t nbytes);
 /* Wait until transfers to gpu resources are finished. */
 void rtFlushGPUTransfers(void);
 #endif
--- a/src/renderer/vk/vma_impl.cpp
+++ b/src/renderer/vk/vma_impl.cpp
@ -1,21 +0,0 @@
 #ifdef _MSC_VER
 #pragma warning(push, 0)
 #elif defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 #pragma GCC diagnostic ignored "-Wmissing-braces"
 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
 #pragma GCC diagnostic ignored "-Wconversion"
 #pragma GCC diagnostic ignored "-Wunused-variable"
 #pragma GCC diagnostic ignored "-Wparentheses"
 #endif
 #include <volk/volk.h>
 #define VMA_STATIC_VULKAN_FUNCTIONS  0
 #define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
 #define VMA_IMPLEMENTATION
 #include <vma/vk_mem_alloc.h>
 #ifdef _MSC_VER
 #pragma warning(pop)
 #elif defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
 #endif