rtengine/src/renderer/dx11/render_graph.cpp
2024-05-19 12:48:50 +02:00

230 lines
8.9 KiB
C++

#include "gfx/render_view.h"
#include "gfx/renderer_api.h"
#include "renderer/common/common_render_graph.h"
#include "device_objects.hpp"
#include "gpu.hpp"
static constexpr unsigned int MAX_SUBMITTED_VIEWS_PER_PASS = 4;
struct rt_pass_runtime_data {
rt_render_view views[RT_DX11_MAX_FRAMES_IN_FLIGHT][MAX_SUBMITTED_VIEWS_PER_PASS];
uint32_t view_count[RT_DX11_MAX_FRAMES_IN_FLIGHT];
unsigned int views_frame_id[RT_DX11_MAX_FRAMES_IN_FLIGHT];
};
static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) {
return rtCreateRenderTarget({.format = rtinfo->format,
.width = rtinfo->width,
.height = rtinfo->height,
.name = rtinfo->name});
}
static int RequireExplicitSynchronization() {
return 0;
}
static size_t GetRuntimeDataSize() {
return sizeof(rt_pass_runtime_data);
}
extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
rt_render_graph_builder_platform_callbacks cbs{};
cbs.CreateRenderTarget = CreateRenderTarget;
cbs.RequireExplicitSynchronization = RequireExplicitSynchronization;
cbs.GetRuntimeDataSize = GetRuntimeDataSize;
return rtCreateRenderGraphBuilder(&cbs);
}
extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
rtDestroyRenderGraphBuilder(builder);
}
extern "C" void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
uint32_t pass_id,
rt_render_view view,
unsigned int frame_id) {
for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
if (render_graph->passes[i].id == pass_id) {
rt_render_pass *pass = &render_graph->passes[i];
rt_pass_runtime_data *runtime_data =
reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
RT_ASSERT(runtime_data->views_frame_id[frame_slot] == frame_id ||
runtime_data->views_frame_id[frame_slot] == 0,
"Tried to submit a view for a not-current frame.");
if (!RT_VERIFY(runtime_data->view_count[frame_slot] < MAX_SUBMITTED_VIEWS_PER_PASS))
return;
runtime_data->views[frame_slot][runtime_data->view_count[frame_slot]++] = view;
runtime_data->views_frame_id[frame_slot] = frame_id;
}
}
}
extern "C" void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph,
unsigned int frame_id) {
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
for (uint32_t i = 0; i < graph->pass_count; ++i) {
rt_pass_runtime_data *runtime_data =
reinterpret_cast<rt_pass_runtime_data *>(graph->passes[i].runtime_data);
#ifdef RT_DEBUG
memset(runtime_data->views[frame_slot], 0, sizeof(runtime_data->views[frame_slot]));
#endif
runtime_data->view_count[frame_slot] = 0;
runtime_data->views_frame_id[frame_slot] = 0;
}
}
static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdbuf_handle, unsigned int frame_id) {
rt_command_buffer *cmd = rtGetCommandBuffer(cmdbuf_handle);
if (!RT_VERIFY(cmd))
return RT_INVALID_VALUE;
if (cmd->annotation) {
WCHAR wname[128];
if (rtUTF8ToWStr(pass->name, wname, sizeof(wname)) == RT_SUCCESS)
cmd->annotation->BeginEvent(wname);
}
// Setup rtvs
ID3D11RenderTargetView *rtvs[4];
ID3D11DepthStencilView *dsv = nullptr;
for (uint32_t i = 0; i < pass->color_output_count; ++i) {
rt_render_target *rt = rtGetRenderTarget(pass->color_outputs[i]);
if (!RT_VERIFY(rt))
return RT_INVALID_VALUE;
RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target");
rtvs[i] = rt->rtv;
if (pass->color_loads[i] == RT_PASS_LOAD_MODE_CLEAR) {
FLOAT color[4] = {
pass->color_clear_values[i].r,
pass->color_clear_values[i].g,
pass->color_clear_values[i].b,
pass->color_clear_values[i].a,
};
cmd->context->ClearRenderTargetView(rt->rtv, color);
}
}
rt_render_target *dsvrt = rtGetRenderTarget(pass->depth_stencil);
if (dsvrt) {
RT_ASSERT(dsvrt->IsDepthStencilTarget(),
"Need to provide a valid depth stencil render target");
dsv = dsvrt->dsv;
if (pass->depth_stencil_load == RT_PASS_LOAD_MODE_CLEAR)
cmd->context->ClearDepthStencilView(
dsv,
(dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL
: D3D11_CLEAR_DEPTH,
pass->depth_stencil_clear_value.depth,
static_cast<UINT8>(pass->depth_stencil_clear_value.stencil));
}
cmd->context->OMSetRenderTargets(static_cast<UINT>(pass->color_output_count), rtvs, dsv);
D3D11_VIEWPORT viewport;
viewport.TopLeftX = pass->render_area.offset.x;
viewport.TopLeftY = pass->render_area.offset.y;
viewport.Width = pass->render_area.size.x;
viewport.Height = pass->render_area.size.y;
viewport.MinDepth = pass->min_depth;
viewport.MaxDepth = pass->max_depth;
if (viewport.Width == 0 || viewport.Height == 0) {
DXGI_SWAP_CHAIN_DESC desc;
g_gpu.swap_chain.swap_chain->GetDesc(&desc);
if (viewport.Width == 0)
viewport.Width = static_cast<float>(desc.BufferDesc.Width);
if (viewport.Height == 0)
viewport.Height = static_cast<float>(desc.BufferDesc.Height);
}
cmd->context->RSSetViewports(1, &viewport);
auto runtime_data = reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
RT_VERIFY(runtime_data);
unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT;
rt_result res = RT_VERIFY(pass->Execute)(pass->id,
cmdbuf_handle,
runtime_data->views[frame_slot],
runtime_data->view_count[frame_slot],
pass->user_data);
if (cmd->annotation) {
cmd->annotation->EndEvent();
}
return res;
}
static bool IsCopyResourcePossible(const rt_render_target *backbuffer) {
DXGI_SWAP_CHAIN_DESC scd;
g_gpu.swap_chain.swap_chain->GetDesc(&scd);
D3D11_TEXTURE2D_DESC td;
backbuffer->texture->GetDesc(&td);
// This is more strict than necessary, because the formats could also be from the same group
return scd.BufferDesc.Width == td.Width && scd.BufferDesc.Height == td.Height &&
scd.SampleDesc.Count == td.SampleDesc.Count && scd.BufferDesc.Format == td.Format;
}
extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph, unsigned int frame_id) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return RT_OUT_OF_MEMORY;
// Alloc a command buffer for every pass
rt_command_buffer_handle *cmdbufs =
RT_ARENA_PUSH_ARRAY(temp.arena, rt_command_buffer_handle, render_graph->pass_count);
rt_result res = rtAllocCommandBuffers(render_graph->pass_count, cmdbufs);
if (res != RT_SUCCESS) {
rtReturnTemporaryArena(temp);
return res;
}
for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
rt_render_pass *pass = &render_graph->passes[i];
res = ExecutePass(pass, cmdbufs[i], frame_id);
if (res != RT_SUCCESS)
break;
}
if (res == RT_SUCCESS) {
res = rtSubmitCommandBuffers(render_graph->pass_count, cmdbufs);
}
// Copy backbuffer to swapchain
rt_render_target *backbuffer =
rtGetRenderTarget(render_graph->render_targets[render_graph->backbuffer_index]);
if (!backbuffer) {
rtReturnTemporaryArena(temp);
return RT_INVALID_VALUE;
}
ID3D11Texture2D *frame_buffer;
if (FAILED(g_gpu.swap_chain.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) {
rtReportError("dx11", "Failed to retrieve the backbuffer.");
rtReturnTemporaryArena(temp);
return RT_UNKNOWN_ERROR;
}
if (IsCopyResourcePossible(backbuffer)) {
g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
} else {
// NOTE(Kevin): The most flexible solution would probably be a fullscreen tri draw
// that implements a blit.
// Another idea would be a compute shader that does a copy&filter but that requires more
// work
RT_NOT_IMPLEMENTED;
}
rtReturnTemporaryArena(temp);
return res;
}