#include "gfx/render_view.h" #include "gfx/renderer_api.h" #include "renderer/common/common_render_graph.h" #include "device_objects.hpp" #include "gpu.hpp" static constexpr unsigned int MAX_SUBMITTED_VIEWS_PER_PASS = 4; struct rt_pass_runtime_data { rt_render_view views[RT_DX11_MAX_FRAMES_IN_FLIGHT][MAX_SUBMITTED_VIEWS_PER_PASS]; uint32_t view_count[RT_DX11_MAX_FRAMES_IN_FLIGHT]; unsigned int views_frame_id[RT_DX11_MAX_FRAMES_IN_FLIGHT]; }; static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) { return rtCreateRenderTarget({.format = rtinfo->format, .width = rtinfo->width, .height = rtinfo->height, .name = rtinfo->name}); } static int RequireExplicitSynchronization() { return 0; } static size_t GetRuntimeDataSize() { return sizeof(rt_pass_runtime_data); } extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) { rt_render_graph_builder_platform_callbacks cbs{}; cbs.CreateRenderTarget = CreateRenderTarget; cbs.RequireExplicitSynchronization = RequireExplicitSynchronization; cbs.GetRuntimeDataSize = GetRuntimeDataSize; return rtCreateRenderGraphBuilder(&cbs); } extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) { rtDestroyRenderGraphBuilder(builder); } extern "C" void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph, uint32_t pass_id, rt_render_view view, unsigned int frame_id) { for (uint32_t i = 0; i < render_graph->pass_count; ++i) { if (render_graph->passes[i].id == pass_id) { rt_render_pass *pass = &render_graph->passes[i]; rt_pass_runtime_data *runtime_data = reinterpret_cast(pass->runtime_data); unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT; RT_ASSERT(runtime_data->views_frame_id[frame_slot] == frame_id || runtime_data->views_frame_id[frame_slot] == 0, "Tried to submit a view for a not-current frame."); if (!RT_VERIFY(runtime_data->view_count[frame_slot] < MAX_SUBMITTED_VIEWS_PER_PASS)) return; runtime_data->views[frame_slot][runtime_data->view_count[frame_slot]++] = view; runtime_data->views_frame_id[frame_slot] = frame_id; } } } extern "C" void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph, unsigned int frame_id) { unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT; for (uint32_t i = 0; i < graph->pass_count; ++i) { rt_pass_runtime_data *runtime_data = reinterpret_cast(graph->passes[i].runtime_data); #ifdef RT_DEBUG memset(runtime_data->views[frame_slot], 0, sizeof(runtime_data->views[frame_slot])); #endif runtime_data->view_count[frame_slot] = 0; runtime_data->views_frame_id[frame_slot] = 0; } } static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdbuf_handle, unsigned int frame_id) { rt_command_buffer *cmd = rtGetCommandBuffer(cmdbuf_handle); if (!RT_VERIFY(cmd)) return RT_INVALID_VALUE; if (cmd->annotation) { WCHAR wname[128]; if (rtUTF8ToWStr(pass->name, wname, sizeof(wname)) == RT_SUCCESS) cmd->annotation->BeginEvent(wname); } // Setup rtvs ID3D11RenderTargetView *rtvs[4]; ID3D11DepthStencilView *dsv = nullptr; for (uint32_t i = 0; i < pass->color_output_count; ++i) { rt_render_target *rt = rtGetRenderTarget(pass->color_outputs[i]); if (!RT_VERIFY(rt)) return RT_INVALID_VALUE; RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target"); rtvs[i] = rt->rtv; if (pass->color_loads[i] == RT_PASS_LOAD_MODE_CLEAR) { FLOAT color[4] = { pass->color_clear_values[i].r, pass->color_clear_values[i].g, pass->color_clear_values[i].b, pass->color_clear_values[i].a, }; cmd->context->ClearRenderTargetView(rt->rtv, color); } } rt_render_target *dsvrt = rtGetRenderTarget(pass->depth_stencil); if (dsvrt) { RT_ASSERT(dsvrt->IsDepthStencilTarget(), "Need to provide a valid depth stencil render target"); dsv = dsvrt->dsv; if (pass->depth_stencil_load == RT_PASS_LOAD_MODE_CLEAR) cmd->context->ClearDepthStencilView( dsv, (dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL : D3D11_CLEAR_DEPTH, pass->depth_stencil_clear_value.depth, static_cast(pass->depth_stencil_clear_value.stencil)); } cmd->context->OMSetRenderTargets(static_cast(pass->color_output_count), rtvs, dsv); D3D11_VIEWPORT viewport; viewport.TopLeftX = pass->render_area.offset.x; viewport.TopLeftY = pass->render_area.offset.y; viewport.Width = pass->render_area.size.x; viewport.Height = pass->render_area.size.y; viewport.MinDepth = pass->min_depth; viewport.MaxDepth = pass->max_depth; if (viewport.Width == 0 || viewport.Height == 0) { DXGI_SWAP_CHAIN_DESC desc; g_gpu.swap_chain.swap_chain->GetDesc(&desc); if (viewport.Width == 0) viewport.Width = static_cast(desc.BufferDesc.Width); if (viewport.Height == 0) viewport.Height = static_cast(desc.BufferDesc.Height); } cmd->context->RSSetViewports(1, &viewport); auto runtime_data = reinterpret_cast(pass->runtime_data); RT_VERIFY(runtime_data); unsigned int frame_slot = frame_id % RT_DX11_MAX_FRAMES_IN_FLIGHT; rt_result res = RT_VERIFY(pass->Execute)(pass->id, cmdbuf_handle, runtime_data->views[frame_slot], runtime_data->view_count[frame_slot], pass->user_data); if (cmd->annotation) { cmd->annotation->EndEvent(); } return res; } static bool IsCopyResourcePossible(const rt_render_target *backbuffer) { DXGI_SWAP_CHAIN_DESC scd; g_gpu.swap_chain.swap_chain->GetDesc(&scd); D3D11_TEXTURE2D_DESC td; backbuffer->texture->GetDesc(&td); // This is more strict than necessary, because the formats could also be from the same group return scd.BufferDesc.Width == td.Width && scd.BufferDesc.Height == td.Height && scd.SampleDesc.Count == td.SampleDesc.Count && scd.BufferDesc.Format == td.Format; } extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph, unsigned int frame_id) { rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); if (!temp.arena) return RT_OUT_OF_MEMORY; // Alloc a command buffer for every pass rt_command_buffer_handle *cmdbufs = RT_ARENA_PUSH_ARRAY(temp.arena, rt_command_buffer_handle, render_graph->pass_count); rt_result res = rtAllocCommandBuffers(render_graph->pass_count, cmdbufs); if (res != RT_SUCCESS) { rtReturnTemporaryArena(temp); return res; } for (uint32_t i = 0; i < render_graph->pass_count; ++i) { rt_render_pass *pass = &render_graph->passes[i]; res = ExecutePass(pass, cmdbufs[i], frame_id); if (res != RT_SUCCESS) break; } if (res == RT_SUCCESS) { res = rtSubmitCommandBuffers(render_graph->pass_count, cmdbufs); } // Copy backbuffer to swapchain rt_render_target *backbuffer = rtGetRenderTarget(render_graph->render_targets[render_graph->backbuffer_index]); if (!backbuffer) { rtReturnTemporaryArena(temp); return RT_INVALID_VALUE; } ID3D11Texture2D *frame_buffer; if (FAILED(g_gpu.swap_chain.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) { rtReportError("dx11", "Failed to retrieve the backbuffer."); rtReturnTemporaryArena(temp); return RT_UNKNOWN_ERROR; } if (IsCopyResourcePossible(backbuffer)) { g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture); } else { // NOTE(Kevin): The most flexible solution would probably be a fullscreen tri draw // that implements a blit. // Another idea would be a compute shader that does a copy&filter but that requires more // work RT_NOT_IMPLEMENTED; } rtReturnTemporaryArena(temp); return res; }