#include "buffer_manager.h" #include "config.h" #include "runtime.h" #include "threading.h" #include #include #include #include #include /* Count leading zeroes. * Note that the return value of __builtin_clz(0) is undefined. */ #ifdef _MSC_VER #include #define lzcnt32(x) __lzcnt((x)) #define popcnt32(x) __popcnt((x)) static __forceinline uint32_t tzcnt32(uint32_t x) { unsigned long i; _BitScanForward(&i, x); return (uint32_t)i; } static __forceinline bool IsLZCNTSupported(void) { #define Type 0x80000001 int info[4]; __cpuid(info, Type); return (info[2] & (1 << 5)) != 0; #undef Type } #elif defined(__GNUC__) #define lzcnt32(x) __builtin_clz((x)) #define tzcnt32(x) __builtin_ctz((x)) #define popcnt32(x) __builtin_popcount((x)) #define IsLZCNTSupported() true #endif #define BLOCK_SIZE 4096u static uint32_t *_bitmap; static char *_memory; static rt_mutex *_guard; static size_t _block_count; RT_CVAR_I(rt_BufferMemoryBudget, "The amount of memory to allocate for the buffer manager. Default: 1GB", RT_GB(1)); extern rt_result InitBufferManager(void) { _guard = rtCreateMutex(); if (!_guard) { rtReportError("BUFFERMGR", "Failed to create the buffer manager mutex."); return RT_UNKNOWN_ERROR; } if (!IsLZCNTSupported()) { rtReportError("BUFFERMGR", "The required lzcnt intrinisc is not supported."); return RT_UNKNOWN_ERROR; } size_t budget = (size_t)rt_BufferMemoryBudget.i; size_t block_count = budget / BLOCK_SIZE; if ((budget % block_count) != 0) { rtLog("BUFFERMGR", "The configured buffer memory budget %zu is not dividable by the block size (4KB).", budget); } size_t dword_count = (block_count + 31) / 32; _block_count = block_count; _memory = malloc(budget + dword_count * sizeof(uint32_t)); if (!_memory) { return RT_OUT_OF_MEMORY; } _bitmap = (uint32_t *)(_memory + budget); memset(_bitmap, 0, sizeof(uint32_t) * dword_count); return RT_SUCCESS; } extern void ShutdownBufferManager(void) { rtDestroyMutex(_guard); } /* Public API */ RT_DLLEXPORT void *rtAllocBuffer(size_t size) { size_t alloc_blocks = (size + BLOCK_SIZE - 1) / BLOCK_SIZE; size_t dword_count = (_block_count + 31) / 32; void *result = NULL; rtLockMutex(_guard); for (size_t i = 0; i < _block_count; ++i) { size_t dword = i / 32; if (_bitmap[dword] == 0 || (size_t)tzcnt32(_bitmap[dword]) >= alloc_blocks) { size_t mask = (1ull << alloc_blocks) - 1; _bitmap[dword] |= (uint32_t)mask; result = _memory + i * BLOCK_SIZE; break; } else if ((size_t)lzcnt32(_bitmap[dword]) >= alloc_blocks) { size_t first = (_bitmap[dword] != 0) ? 32 - lzcnt32(_bitmap[dword]) : 0; size_t mask = ((1ull << alloc_blocks) - 1) << first; _bitmap[dword] |= (uint32_t)mask; result = _memory + (i + first) * BLOCK_SIZE; break; } else if (_bitmap[dword] != UINT32_MAX) { size_t first = 32 - lzcnt32(_bitmap[dword]); size_t leftover = alloc_blocks - lzcnt32(_bitmap[dword]); if (dword == dword_count - 1) { break; // Reached the end } if (leftover < 32) { size_t next_dword_free = _bitmap[dword + 1] != 0 ? tzcnt32(_bitmap[dword + 1]) : 32; if (next_dword_free < leftover) continue; _bitmap[dword] = UINT32_MAX; size_t mask = (1ull << leftover) - 1; _bitmap[dword + 1] |= (uint32_t)mask; result = _memory + (i + first) * BLOCK_SIZE; break; } else { // Check each bit separately bool free = true; for (size_t j = i + first; j < i + first + alloc_blocks; ++j) { size_t dwordj = j / 32; size_t bitj = j % 32; if ((_bitmap[dwordj] & (1u << bitj)) != 0) { free = false; break; } } if (free) { for (size_t j = i + first; j < i + first + alloc_blocks; ++j) { size_t dwordj = j / 32; size_t bitj = j % 32; _bitmap[dwordj] |= (1u << bitj); } result = _memory + (i + first) * BLOCK_SIZE; } } } else { /* These 32 blocks are all allocated. Go to the next dword */ assert((i % 32) == 0); i += 31; } } rtUnlockMutex(_guard); return result; } RT_DLLEXPORT void rtReleaseBuffer(const void *begin, size_t size) { size_t alloc_blocks = (size + BLOCK_SIZE - 1) / BLOCK_SIZE; uintptr_t off = (uintptr_t)begin - (uintptr_t)_memory; uintptr_t first_block = off / BLOCK_SIZE; rtLockMutex(_guard); for (size_t i = first_block; i < first_block + alloc_blocks; ++i) { size_t dword = i / 32; size_t bit = i % 32; _bitmap[dword] &= ~(1u << bit); } rtUnlockMutex(_guard); }