#include #include #include #define VY_VK_DONT_DEFINE_GPU_GLOBAL #include "gpu.h" #include "runtime/config.h" #include "runtime/renderer_api.h" #include "runtime/runtime.h" VY_CVAR_I( r_VkEnableAPIAllocTracking, "Enable tracking of allocations done by the vulkan api. [0/1] Default: 0", 0); VY_CVAR_S(r_VkPhysDeviceName, "Name of the selected physical device. Default: \"\"", ""); vy_vk_gpu g_gpu; static VkAllocationCallbacks _tracking_alloc_cbs; static const char *AllocationScopeToString(VkSystemAllocationScope scope) { switch (scope) { case VK_SYSTEM_ALLOCATION_SCOPE_COMMAND: return "COMMAND"; case VK_SYSTEM_ALLOCATION_SCOPE_OBJECT: return "OBJECT"; case VK_SYSTEM_ALLOCATION_SCOPE_CACHE: return "CACHE"; case VK_SYSTEM_ALLOCATION_SCOPE_DEVICE: return "DEVICE"; case VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE: return "INSTANCE"; default: return "UNKNOWN"; } } static void *TrackAllocation(void *userData, size_t size, size_t alignment, VkSystemAllocationScope scope) { vyLog("vk", "Allocation. Size: %zu, Alignment: %zu, Scope: %s", size, alignment, AllocationScopeToString(scope)); #ifdef _WIN32 return _aligned_malloc(size, alignment); #else return aligned_alloc(alignment, size); #endif } static void *TrackReallocation(void *userData, void *original, size_t size, size_t alignment, VkSystemAllocationScope scope) { vyLog("vk", "Reallocation. Size: %zu, Alignment: %zu, Scope: %s", size, alignment, AllocationScopeToString(scope)); return realloc(original, size); } static void TrackFree(void *userData, void *memory) { free(memory); } static VkBool32 VKAPI_PTR DebugUtilsMessengerCb(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT types, const VkDebugUtilsMessengerCallbackDataEXT *callbackData, void *userData) { return VK_FALSE; } VY_DLLEXPORT void vyRegisterCVars(void) { vyRegisterCVAR(&r_VkEnableAPIAllocTracking); vyRegisterCVAR(&r_VkPhysDeviceName); } static int CreateInstance(void) { VkResult result = volkInitialize(); if (result != VK_SUCCESS) { vyReportError("vk", "Initialization failed: volkInitialize()"); return 0; } VkApplicationInfo app_info = { .apiVersion = VK_API_VERSION_1_2, .applicationVersion = 0x00001000, .engineVersion = 0x00001000, .pEngineName = "voyageEngine", .pApplicationName = "Voyage", }; const char *extensions[] = { VK_KHR_SURFACE_EXTENSION_NAME, #ifdef _WIN32 "VK_KHR_win32_surface", #elif defined(VY_USE_XLIB) "VK_KHR_xlib_surface", #endif #ifdef VY_DEBUG VK_EXT_DEBUG_UTILS_EXTENSION_NAME, #endif }; const char *layers[1]; unsigned int layer_count = 0; #ifdef VY_DEBUG /* Search for layers we want to enable */ uint32_t available_layer_count = 0; result = vkEnumerateInstanceLayerProperties(&available_layer_count, NULL); if (result == VK_SUCCESS) { VkLayerProperties *props = calloc(available_layer_count, sizeof(VkLayerProperties)); if (props) { vkEnumerateInstanceLayerProperties(&available_layer_count, props); for (uint32_t i = 0; i < available_layer_count; ++i) { if (strcmp(props[i].layerName, "VK_LAYER_KHRONOS_validation") == 0) { layers[0] = "VK_LAYER_KHRONOS_validation"; layer_count = 1; break; } } free(props); } else { vyLog("vk", "Failed to allocate storage for instance layer properties."); } } else { vyLog("vk", "vkEnumerateInstanceLayerProperties failed."); } #endif VkInstanceCreateInfo instance_info = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &app_info, .ppEnabledExtensionNames = extensions, .enabledExtensionCount = VY_ARRAY_COUNT(extensions), .ppEnabledLayerNames = layers, .enabledLayerCount = layer_count, }; result = vkCreateInstance(&instance_info, g_gpu.alloc_cb, &g_gpu.instance); if (result != VK_SUCCESS) { vyReportError("vk", "Failed to create the vulkan instance."); return -1; } volkLoadInstance(g_gpu.instance); #ifdef VY_DEBUG /* Create the debug utils messenger */ VkDebugUtilsMessengerCreateInfoEXT messenger_info = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, .pfnUserCallback = DebugUtilsMessengerCb, }; vkCreateDebugUtilsMessengerEXT(g_gpu.instance, &messenger_info, g_gpu.alloc_cb, &g_gpu.messenger); #endif return 0; } static int CreateSurface(const vy_renderer_init_info *info) { #ifdef _WIN32 VkWin32SurfaceCreateInfoKHR surface_info = { .sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, .hinstance = info->hInstance, .hwnd = info->hWnd, }; if (vkCreateWin32SurfaceKHR(g_gpu.instance, &surface_info, g_gpu.alloc_cb, &g_gpu.surface) == VK_SUCCESS) return 0; else return -100; #elif defined(VY_USE_XLIB_KHR) VkXlibSurfaceCreateInfoKHR surface_info = { .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, .dpy = info->display, .window = info->window, }; if (vkCreateXlibSurfaceKHR(g_gpu.instance, &surface_info, &g_gpu.alloc_cb, &g_gpu.surface) == VK_SUCCESS) return 0; else return -100; #endif } typedef struct { uint32_t graphics; uint32_t compute; uint32_t present; } vy_queue_indices; static vy_queue_indices RetrieveQueueIndices(VkPhysicalDevice phys_dev, VkSurfaceKHR surface) { vy_queue_indices indices = {.graphics = UINT32_MAX, .compute = UINT32_MAX, .present = UINT32_MAX}; uint32_t count = 0; vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, NULL); VkQueueFamilyProperties *props = calloc(count, sizeof(VkQueueFamilyProperties)); if (!props) { return indices; } vkGetPhysicalDeviceQueueFamilyProperties(phys_dev, &count, props); for (uint32_t i = 0; i < count; ++i) { if (props[i].queueCount == 0) continue; if ((props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) indices.graphics = i; if ((props[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0) indices.compute = i; VkBool32 present_supported = VK_FALSE; vkGetPhysicalDeviceSurfaceSupportKHR(phys_dev, i, surface, &present_supported); if (present_supported) indices.present = i; } free(props); return indices; } static bool CheckDeviceExtensionSupported(VkPhysicalDevice phys_dev) { const char *required_extensions[] = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, }; uint32_t extension_count; vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, NULL); VkExtensionProperties *supported_extensions = calloc(extension_count, sizeof(VkExtensionProperties)); if (!supported_extensions) return false; vkEnumerateDeviceExtensionProperties(phys_dev, NULL, &extension_count, supported_extensions); bool supported = true; for (uint32_t i = 0; i < VY_ARRAY_COUNT(required_extensions); ++i) { bool found = false; for (uint32_t j = 0; j < extension_count; ++j) { if (strncmp(supported_extensions[j].extensionName, required_extensions[i], VK_MAX_EXTENSION_NAME_SIZE) == 0) { found = true; break; } } if (!found) { supported = false; goto out; } } out: free(supported_extensions); return supported; } static int ChoosePhysicalDevice(void) { g_gpu.phys_device = VK_NULL_HANDLE; uint32_t phys_device_count = 0; VkResult result = vkEnumeratePhysicalDevices(g_gpu.instance, &phys_device_count, NULL); if (result != VK_SUCCESS) { vyReportError("vk", "Failed to enumerate the physical devices."); return -2; } VkPhysicalDevice *phys_devices = calloc(phys_device_count, sizeof(VkPhysicalDevice)); if (!phys_devices) { vyReportError( "vk", "Failed to enumerate the physical devices: Out of memory."); return -2; } vkEnumeratePhysicalDevices(g_gpu.instance, &phys_device_count, phys_devices); uint32_t highscore = 0; uint32_t best_index = phys_device_count; for (uint32_t i = 0; i < phys_device_count; ++i) { VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES, .pNext = NULL, }; VkPhysicalDeviceProperties2 props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &descriptor_indexing_props, }; vkGetPhysicalDeviceProperties2(phys_devices[i], &props); if (!CheckDeviceExtensionSupported(phys_devices[i])) continue; vy_queue_indices indices = RetrieveQueueIndices(phys_devices[i], g_gpu.surface); if (indices.compute == UINT32_MAX || indices.present == UINT32_MAX || indices.graphics == UINT32_MAX) continue; uint32_t score = 0; if (props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) score += 100; score += (props.properties.limits.maxFramebufferWidth / 100) * (props.properties.limits.maxFramebufferHeight / 100); score += (descriptor_indexing_props .shaderStorageBufferArrayNonUniformIndexingNative) ? 100 : 0; score += (descriptor_indexing_props .shaderSampledImageArrayNonUniformIndexingNative) ? 100 : 0; if (score > highscore) { highscore = score; best_index = i; } if (strncmp(props.properties.deviceName, r_VkPhysDeviceName.s, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE) == 0) { best_index = i; break; } } if (best_index < phys_device_count) { g_gpu.phys_device = phys_devices[0]; VkPhysicalDeviceDescriptorIndexingProperties descriptor_indexing_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES, .pNext = NULL, }; VkPhysicalDeviceProperties2 props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &descriptor_indexing_props, }; vkGetPhysicalDeviceProperties2(phys_devices[0], &props); g_gpu.phys_device_props = props.properties; g_gpu.descriptor_indexing_props = descriptor_indexing_props; } free(phys_devices); if (g_gpu.phys_device == VK_NULL_HANDLE) { vyReportError("vk", "Failed to find a suitable physical device."); return -3; } return 0; } static int CreateDevice(void) { const char *extensions[] = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, }; vy_queue_indices queue_indices = RetrieveQueueIndices(g_gpu.phys_device, g_gpu.surface); float priority = 1.f; uint32_t distinct_queue_count = 1; VkDeviceQueueCreateInfo queue_info[3]; queue_info[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_info[0].pNext = NULL; queue_info[0].flags = 0; queue_info[0].queueCount = 1; queue_info[0].queueFamilyIndex = queue_indices.graphics; queue_info[0].pQueuePriorities = &priority; if (queue_indices.compute != queue_indices.graphics) { queue_info[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_info[1].pNext = NULL; queue_info[1].flags = 0; queue_info[1].queueCount = 1; queue_info[1].queueFamilyIndex = queue_indices.compute; queue_info[1].pQueuePriorities = &priority; ++distinct_queue_count; } if (queue_indices.present != queue_indices.graphics && queue_indices.present != queue_indices.compute) { queue_info[distinct_queue_count].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_info[distinct_queue_count].pNext = NULL; queue_info[distinct_queue_count].flags = 0; queue_info[distinct_queue_count].queueCount = 1; queue_info[distinct_queue_count].queueFamilyIndex = queue_indices.present; queue_info[distinct_queue_count].pQueuePriorities = &priority; } VkDeviceCreateInfo device_info = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .enabledExtensionCount = VY_ARRAY_COUNT(extensions), .ppEnabledExtensionNames = extensions, .pQueueCreateInfos = queue_info, .queueCreateInfoCount = distinct_queue_count, }; if (vkCreateDevice(g_gpu.phys_device, &device_info, g_gpu.alloc_cb, &g_gpu.device) != VK_SUCCESS) { vyReportError("vk", "Device creation failed."); return -10; } vkGetDeviceQueue(g_gpu.device, queue_indices.graphics, 0, &g_gpu.graphics_queue); vkGetDeviceQueue(g_gpu.device, queue_indices.compute, 0, &g_gpu.compute_queue); vkGetDeviceQueue(g_gpu.device, queue_indices.present, 0, &g_gpu.present_queue); return 0; } VY_DLLEXPORT int vyInit(const vy_renderer_init_info *info) { vyLog("vk", "Init"); _tracking_alloc_cbs.pUserData = NULL; _tracking_alloc_cbs.pfnAllocation = TrackAllocation; _tracking_alloc_cbs.pfnReallocation = TrackReallocation; _tracking_alloc_cbs.pfnFree = TrackFree; if (r_VkEnableAPIAllocTracking.i) { g_gpu.alloc_cb = &_tracking_alloc_cbs; } else { g_gpu.alloc_cb = NULL; } int res = CreateInstance(); if (res != 0) return res; res = CreateSurface(info); if (res != 0) return res; res = ChoosePhysicalDevice(); if (res != 0) return res; res = CreateDevice(); if (res != 0) return res; return 0; } VY_DLLEXPORT void vyShutdown(void) { vyLog("vk", "Shutdown"); vkDestroyDevice(g_gpu.device, g_gpu.alloc_cb); vkDestroySurfaceKHR(g_gpu.instance, g_gpu.surface, g_gpu.alloc_cb); vkDestroyInstance(g_gpu.instance, g_gpu.alloc_cb); }