rtengine/src/runtime/buffer_manager.c
Kevin Trogant b4eef37741 Fix linux build
I did not test if it actually runs, but it builds with warning_level=2.
2024-02-29 16:12:09 +01:00

169 lines
5.3 KiB
C

#include "buffer_manager.h"
#include "config.h"
#include "runtime.h"
#include "threading.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/* Count leading zeroes.
* Note that the return value of __builtin_clz(0) is undefined. */
#ifdef _MSC_VER
#include <intrin.h>
#define lzcnt32(x) __lzcnt((x))
#define popcnt32(x) __popcnt((x))
static __forceinline uint32_t tzcnt32(uint32_t x) {
unsigned long i;
_BitScanForward(&i, x);
return (uint32_t)i;
}
static __forceinline bool IsLZCNTSupported(void) {
#define Type 0x80000001
int info[4];
__cpuid(info, Type);
return (info[2] & (1 << 5)) != 0;
#undef Type
}
#elif defined(__GNUC__)
#define lzcnt32(x) __builtin_clz((x))
#define tzcnt32(x) __builtin_ctz((x))
#define popcnt32(x) __builtin_popcount((x))
#define IsLZCNTSupported() true
#endif
#define BLOCK_SIZE 4096u
static uint32_t *_bitmap;
static char *_memory;
static rt_mutex *_guard;
static size_t _block_count;
RT_CVAR_I(rt_BufferMemoryBudget,
"The amount of memory to allocate for the buffer manager. Default: 1GB",
RT_GB(1));
extern rt_result InitBufferManager(void) {
_guard = rtCreateMutex();
if (!_guard) {
rtReportError("BUFFERMGR", "Failed to create the buffer manager mutex.");
return RT_UNKNOWN_ERROR;
}
if (!IsLZCNTSupported()) {
rtReportError("BUFFERMGR", "The required lzcnt intrinisc is not supported.");
return RT_UNKNOWN_ERROR;
}
size_t budget = (size_t)rt_BufferMemoryBudget.i;
size_t block_count = budget / BLOCK_SIZE;
if ((budget % block_count) != 0) {
rtLog("BUFFERMGR",
"The configured buffer memory budget %zu is not dividable by the block size (4KB).",
budget);
}
size_t dword_count = (block_count + 31) / 32;
_block_count = block_count;
_memory = malloc(budget + dword_count * sizeof(uint32_t));
if (!_memory) {
return RT_OUT_OF_MEMORY;
}
_bitmap = (uint32_t *)(_memory + budget);
memset(_bitmap, 0, sizeof(uint32_t) * dword_count);
return RT_SUCCESS;
}
extern void ShutdownBufferManager(void) {
rtDestroyMutex(_guard);
}
/* Public API */
RT_DLLEXPORT void *rtAllocBuffer(size_t size) {
size_t alloc_blocks = (size + BLOCK_SIZE - 1) / BLOCK_SIZE;
size_t dword_count = (_block_count + 31) / 32;
void *result = NULL;
rtLockMutex(_guard);
for (size_t i = 0; i < _block_count; ++i) {
size_t dword = i / 32;
if (_bitmap[dword] == 0 || (size_t)tzcnt32(_bitmap[dword]) >= alloc_blocks) {
size_t mask = (1ull << alloc_blocks) - 1;
_bitmap[dword] |= (uint32_t)mask;
result = _memory + i * BLOCK_SIZE;
break;
} else if ((size_t)lzcnt32(_bitmap[dword]) >= alloc_blocks) {
size_t first = (_bitmap[dword] != 0) ? 32 - lzcnt32(_bitmap[dword]) : 0;
size_t mask = ((1ull << alloc_blocks) - 1) << first;
_bitmap[dword] |= (uint32_t)mask;
result = _memory + (i + first) * BLOCK_SIZE;
break;
} else if (_bitmap[dword] != UINT32_MAX) {
size_t first = 32 - lzcnt32(_bitmap[dword]);
size_t leftover = alloc_blocks - lzcnt32(_bitmap[dword]);
if (dword == dword_count - 1) {
break; // Reached the end
}
if (leftover < 32) {
size_t next_dword_free = _bitmap[dword + 1] != 0 ? tzcnt32(_bitmap[dword + 1]) : 32;
if (next_dword_free < leftover)
continue;
_bitmap[dword] = UINT32_MAX;
size_t mask = (1ull << leftover) - 1;
_bitmap[dword + 1] |= (uint32_t)mask;
result = _memory + (i + first) * BLOCK_SIZE;
break;
} else {
// Check each bit separately
bool free = true;
for (size_t j = i + first; j < i + first + alloc_blocks; ++j) {
size_t dwordj = j / 32;
size_t bitj = j % 32;
if ((_bitmap[dwordj] & (1u << bitj)) != 0) {
free = false;
break;
}
}
if (free) {
for (size_t j = i + first; j < i + first + alloc_blocks; ++j) {
size_t dwordj = j / 32;
size_t bitj = j % 32;
_bitmap[dwordj] |= (1u << bitj);
}
result = _memory + (i + first) * BLOCK_SIZE;
}
}
} else {
/* These 32 blocks are all allocated. Go to the next dword */
assert((i % 32) == 0);
i += 31;
}
}
rtUnlockMutex(_guard);
return result;
}
RT_DLLEXPORT void rtReleaseBuffer(const void *begin, size_t size) {
size_t alloc_blocks = (size + BLOCK_SIZE - 1) / BLOCK_SIZE;
uintptr_t off = (uintptr_t)begin - (uintptr_t)_memory;
uintptr_t first_block = off / BLOCK_SIZE;
rtLockMutex(_guard);
for (size_t i = first_block; i < first_block + alloc_blocks; ++i) {
size_t dword = i / 32;
size_t bit = i % 32;
_bitmap[dword] &= ~(1u << bit);
}
rtUnlockMutex(_guard);
}