From 3a9f9d4986dc11f5262a96ceff8e0bfc53c2f2db Mon Sep 17 00:00:00 2001 From: Kevin Trogant Date: Wed, 31 Jan 2024 22:48:51 +0100 Subject: [PATCH] Write the first compiled resources to files --- contrib/stb_sprintf.h | 1910 +++++++++++++++++++++++++++ src/runtime/aio.c | 97 +- src/runtime/aio.h | 34 +- src/runtime/assert.c | 45 + src/runtime/asset_compiler.c | 122 +- src/runtime/asset_compiler.h | 20 +- src/runtime/asset_manager.c | 0 src/runtime/assets.h | 6 - src/runtime/buffer_manager.c | 250 +--- src/runtime/buffer_manager.h | 2 - src/runtime/ds.h | 59 + src/runtime/ds_minheap.c | 138 ++ src/runtime/dxc_shader_compiler.cpp | 11 +- src/runtime/error_report.c | 12 +- src/runtime/fsutils.c | 6 + src/runtime/fsutils.h | 2 + src/runtime/init.c | 14 + src/runtime/pipeline_processor.c | 253 +++- src/runtime/renderer_api.h | 29 +- src/runtime/resource_manager.c | 444 +++++++ src/runtime/resources.h | 72 + src/runtime/runtime.h | 47 +- src/runtime/shader_compiler.h | 15 +- src/runtime/sprint.c | 47 + 24 files changed, 3265 insertions(+), 370 deletions(-) create mode 100644 contrib/stb_sprintf.h create mode 100644 src/runtime/assert.c delete mode 100644 src/runtime/asset_manager.c delete mode 100644 src/runtime/assets.h create mode 100644 src/runtime/ds.h create mode 100644 src/runtime/ds_minheap.c create mode 100644 src/runtime/resource_manager.c create mode 100644 src/runtime/resources.h create mode 100644 src/runtime/sprint.c diff --git a/contrib/stb_sprintf.h b/contrib/stb_sprintf.h new file mode 100644 index 0000000..093415f --- /dev/null +++ b/contrib/stb_sprintf.h @@ -0,0 +1,1910 @@ +// stb_sprintf - v1.10 - public domain snprintf() implementation +// originally by Jeff Roberts / RAD Game Tools, 2015/10/20 +// http://github.com/nothings/stb +// +// allowed types: sc uidBboXx p AaGgEef n +// lengths : hh h ll j z t I64 I32 I +// +// Contributors: +// Fabian "ryg" Giesen (reformatting) +// github:aganm (attribute format) +// +// Contributors (bugfixes): +// github:d26435 +// github:trex78 +// github:account-login +// Jari Komppa (SI suffixes) +// Rohit Nirmal +// Marcin Wojdyr +// Leonard Ritter +// Stefano Zanotti +// Adam Allison +// Arvid Gerstmann +// Markus Kolb +// +// LICENSE: +// +// See end of file for license information. + +#ifndef STB_SPRINTF_H_INCLUDE +#define STB_SPRINTF_H_INCLUDE + +/* +Single file sprintf replacement. + +Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20. +Hereby placed in public domain. + +This is a full sprintf replacement that supports everything that +the C runtime sprintfs support, including float/double, 64-bit integers, +hex floats, field parameters (%*.*d stuff), length reads backs, etc. + +Why would you need this if sprintf already exists? Well, first off, +it's *much* faster (see below). It's also much smaller than the CRT +versions code-space-wise. We've also added some simple improvements +that are super handy (commas in thousands, callbacks at buffer full, +for example). Finally, the format strings for MSVC and GCC differ +for 64-bit integers (among other small things), so this lets you use +the same format strings in cross platform code. + +It uses the standard single file trick of being both the header file +and the source itself. If you just include it normally, you just get +the header file function definitions. To get the code, you include +it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first. + +It only uses va_args macros from the C runtime to do it's work. It +does cast doubles to S64s and shifts and divides U64s, which does +drag in CRT code on most platforms. + +It compiles to roughly 8K with float support, and 4K without. +As a comparison, when using MSVC static libs, calling sprintf drags +in 16K. + +API: +==== +int stbsp_sprintf( char * buf, char const * fmt, ... ) +int stbsp_snprintf( char * buf, int count, char const * fmt, ... ) + Convert an arg list into a buffer. stbsp_snprintf always returns + a zero-terminated string (unlike regular snprintf). + +int stbsp_vsprintf( char * buf, char const * fmt, va_list va ) +int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va ) + Convert a va_list arg list into a buffer. stbsp_vsnprintf always returns + a zero-terminated string (unlike regular snprintf). + +int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va ) + typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len ); + Convert into a buffer, calling back every STB_SPRINTF_MIN chars. + Your callback can then copy the chars out, print them or whatever. + This function is actually the workhorse for everything else. + The buffer you pass in must hold at least STB_SPRINTF_MIN characters. + // you return the next buffer to use or 0 to stop converting + +void stbsp_set_separators( char comma, char period ) + Set the comma and period characters to use. + +FLOATS/DOUBLES: +=============== +This code uses a internal float->ascii conversion method that uses +doubles with error correction (double-doubles, for ~105 bits of +precision). This conversion is round-trip perfect - that is, an atof +of the values output here will give you the bit-exact double back. + +One difference is that our insignificant digits will be different than +with MSVC or GCC (but they don't match each other either). We also +don't attempt to find the minimum length matching float (pre-MSVC15 +doesn't either). + +If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT +and you'll save 4K of code space. + +64-BIT INTS: +============ +This library also supports 64-bit integers and you can use MSVC style or +GCC style indicators (%I64d or %lld). It supports the C99 specifiers +for size_t and ptr_diff_t (%jd %zd) as well. + +EXTRAS: +======= +Like some GCCs, for integers and floats, you can use a ' (single quote) +specifier and commas will be inserted on the thousands: "%'d" on 12345 +would print 12,345. + +For integers and floats, you can use a "$" specifier and the number +will be converted to float and then divided to get kilo, mega, giga or +tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is +"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn +2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three +$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the +suffix, add "_" specifier: "%_$d" -> "2.53M". + +In addition to octal and hexadecimal conversions, you can print +integers in binary: "%b" for 256 would print 100. + +PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC): +=================================================================== +"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC) +"%24d" across all 32-bit ints (4.5x/4.2x faster) +"%x" across all 32-bit ints (4.5x/3.8x faster) +"%08x" across all 32-bit ints (4.3x/3.8x faster) +"%f" across e-10 to e+10 floats (7.3x/6.0x faster) +"%e" across e-10 to e+10 floats (8.1x/6.0x faster) +"%g" across e-10 to e+10 floats (10.0x/7.1x faster) +"%f" for values near e-300 (7.9x/6.5x faster) +"%f" for values near e+300 (10.0x/9.1x faster) +"%e" for values near e-300 (10.1x/7.0x faster) +"%e" for values near e+300 (9.2x/6.0x faster) +"%.320f" for values near e-300 (12.6x/11.2x faster) +"%a" for random values (8.6x/4.3x faster) +"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster) +"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster) +"%s%s%s" for 64 char strings (7.1x/7.3x faster) +"...512 char string..." ( 35.0x/32.5x faster!) +*/ + +#if defined(__clang__) + #if defined(__has_feature) && defined(__has_attribute) + #if __has_feature(address_sanitizer) + #if __has_attribute(__no_sanitize__) + #define STBSP__ASAN __attribute__((__no_sanitize__("address"))) + #elif __has_attribute(__no_sanitize_address__) + #define STBSP__ASAN __attribute__((__no_sanitize_address__)) + #elif __has_attribute(__no_address_safety_analysis__) + #define STBSP__ASAN __attribute__((__no_address_safety_analysis__)) + #endif + #endif + #endif +#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ + #define STBSP__ASAN __attribute__((__no_sanitize_address__)) + #endif +#elif defined(_MSC_VER) + #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ + #define STBSP__ASAN __declspec(no_sanitize_address) + #endif +#endif + +#ifndef STBSP__ASAN +#define STBSP__ASAN +#endif + +#ifdef STB_SPRINTF_STATIC +#define STBSP__PUBLICDEC static +#define STBSP__PUBLICDEF static STBSP__ASAN +#else +#ifdef __cplusplus +#define STBSP__PUBLICDEC extern "C" +#define STBSP__PUBLICDEF extern "C" STBSP__ASAN +#else +#define STBSP__PUBLICDEC extern +#define STBSP__PUBLICDEF STBSP__ASAN +#endif +#endif + +#if defined(__has_attribute) + #if __has_attribute(format) + #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va))) + #endif +#endif + +#ifndef STBSP__ATTRIBUTE_FORMAT +#define STBSP__ATTRIBUTE_FORMAT(fmt,va) +#endif + +#ifdef _MSC_VER +#define STBSP__NOTUSED(v) (void)(v) +#else +#define STBSP__NOTUSED(v) (void)sizeof(v) +#endif + +#include // for va_arg(), va_list() +#include // size_t, ptrdiff_t + +#ifndef STB_SPRINTF_MIN +#define STB_SPRINTF_MIN 512 // how many characters per callback +#endif +typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len); + +#ifndef STB_SPRINTF_DECORATE +#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names +#endif + +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4); + +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va); +STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period); + +#endif // STB_SPRINTF_H_INCLUDE + +#ifdef STB_SPRINTF_IMPLEMENTATION + +#define stbsp__uint32 unsigned int +#define stbsp__int32 signed int + +#ifdef _MSC_VER +#define stbsp__uint64 unsigned __int64 +#define stbsp__int64 signed __int64 +#else +#define stbsp__uint64 unsigned long long +#define stbsp__int64 signed long long +#endif +#define stbsp__uint16 unsigned short + +#ifndef stbsp__uintptr +#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__) +#define stbsp__uintptr stbsp__uint64 +#else +#define stbsp__uintptr stbsp__uint32 +#endif +#endif + +#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC) +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define STB_SPRINTF_MSVC_MODE +#endif +#endif + +#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses +#define STBSP__UNALIGNED(code) +#else +#define STBSP__UNALIGNED(code) code +#endif + +#ifndef STB_SPRINTF_NOFLOAT +// internal float utility functions +static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits); +static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value); +#define STBSP__SPECIAL 0x7000 +#endif + +static char stbsp__period = '.'; +static char stbsp__comma = ','; +static struct +{ + short temp; // force next field to be 2-byte aligned + char pair[201]; +} stbsp__digitpair = +{ + 0, + "00010203040506070809101112131415161718192021222324" + "25262728293031323334353637383940414243444546474849" + "50515253545556575859606162636465666768697071727374" + "75767778798081828384858687888990919293949596979899" +}; + +STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod) +{ + stbsp__period = pperiod; + stbsp__comma = pcomma; +} + +#define STBSP__LEFTJUST 1 +#define STBSP__LEADINGPLUS 2 +#define STBSP__LEADINGSPACE 4 +#define STBSP__LEADING_0X 8 +#define STBSP__LEADINGZERO 16 +#define STBSP__INTMAX 32 +#define STBSP__TRIPLET_COMMA 64 +#define STBSP__NEGATIVE 128 +#define STBSP__METRIC_SUFFIX 256 +#define STBSP__HALFWIDTH 512 +#define STBSP__METRIC_NOSPACE 1024 +#define STBSP__METRIC_1024 2048 +#define STBSP__METRIC_JEDEC 4096 + +static void stbsp__lead_sign(stbsp__uint32 fl, char *sign) +{ + sign[0] = 0; + if (fl & STBSP__NEGATIVE) { + sign[0] = 1; + sign[1] = '-'; + } else if (fl & STBSP__LEADINGSPACE) { + sign[0] = 1; + sign[1] = ' '; + } else if (fl & STBSP__LEADINGPLUS) { + sign[0] = 1; + sign[1] = '+'; + } +} + +static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit) +{ + char const * sn = s; + + // get up to 4-byte alignment + for (;;) { + if (((stbsp__uintptr)sn & 3) == 0) + break; + + if (!limit || *sn == 0) + return (stbsp__uint32)(sn - s); + + ++sn; + --limit; + } + + // scan over 4 bytes at a time to find terminating 0 + // this will intentionally scan up to 3 bytes past the end of buffers, + // but becase it works 4B aligned, it will never cross page boundaries + // (hence the STBSP__ASAN markup; the over-read here is intentional + // and harmless) + while (limit >= 4) { + stbsp__uint32 v = *(stbsp__uint32 *)sn; + // bit hack to find if there's a 0 byte in there + if ((v - 0x01010101) & (~v) & 0x80808080UL) + break; + + sn += 4; + limit -= 4; + } + + // handle the last few characters to find actual size + while (limit && *sn) { + ++sn; + --limit; + } + + return (stbsp__uint32)(sn - s); +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va) +{ + static char hex[] = "0123456789abcdefxp"; + static char hexu[] = "0123456789ABCDEFXP"; + char *bf; + char const *f; + int tlen = 0; + + bf = buf; + f = fmt; + for (;;) { + stbsp__int32 fw, pr, tz; + stbsp__uint32 fl; + + // macros for the callback buffer stuff + #define stbsp__chk_cb_bufL(bytes) \ + { \ + int len = (int)(bf - buf); \ + if ((len + (bytes)) >= STB_SPRINTF_MIN) { \ + tlen += len; \ + if (0 == (bf = buf = callback(buf, user, len))) \ + goto done; \ + } \ + } + #define stbsp__chk_cb_buf(bytes) \ + { \ + if (callback) { \ + stbsp__chk_cb_bufL(bytes); \ + } \ + } + #define stbsp__flush_cb() \ + { \ + stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \ + } // flush if there is even one byte in the buffer + #define stbsp__cb_buf_clamp(cl, v) \ + cl = v; \ + if (callback) { \ + int lg = STB_SPRINTF_MIN - (int)(bf - buf); \ + if (cl > lg) \ + cl = lg; \ + } + + // fast copy everything up to the next % (or end of string) + for (;;) { + while (((stbsp__uintptr)f) & 3) { + schk1: + if (f[0] == '%') + goto scandd; + schk2: + if (f[0] == 0) + goto endfmt; + stbsp__chk_cb_buf(1); + *bf++ = f[0]; + ++f; + } + for (;;) { + // Check if the next 4 bytes contain %(0x25) or end of string. + // Using the 'hasless' trick: + // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord + stbsp__uint32 v, c; + v = *(stbsp__uint32 *)f; + c = (~v) & 0x80808080; + if (((v ^ 0x25252525) - 0x01010101) & c) + goto schk1; + if ((v - 0x01010101) & c) + goto schk2; + if (callback) + if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4) + goto schk1; + #ifdef STB_SPRINTF_NOUNALIGNED + if(((stbsp__uintptr)bf) & 3) { + bf[0] = f[0]; + bf[1] = f[1]; + bf[2] = f[2]; + bf[3] = f[3]; + } else + #endif + { + *(stbsp__uint32 *)bf = v; + } + bf += 4; + f += 4; + } + } + scandd: + + ++f; + + // ok, we have a percent, read the modifiers first + fw = 0; + pr = -1; + fl = 0; + tz = 0; + + // flags + for (;;) { + switch (f[0]) { + // if we have left justify + case '-': + fl |= STBSP__LEFTJUST; + ++f; + continue; + // if we have leading plus + case '+': + fl |= STBSP__LEADINGPLUS; + ++f; + continue; + // if we have leading space + case ' ': + fl |= STBSP__LEADINGSPACE; + ++f; + continue; + // if we have leading 0x + case '#': + fl |= STBSP__LEADING_0X; + ++f; + continue; + // if we have thousand commas + case '\'': + fl |= STBSP__TRIPLET_COMMA; + ++f; + continue; + // if we have kilo marker (none->kilo->kibi->jedec) + case '$': + if (fl & STBSP__METRIC_SUFFIX) { + if (fl & STBSP__METRIC_1024) { + fl |= STBSP__METRIC_JEDEC; + } else { + fl |= STBSP__METRIC_1024; + } + } else { + fl |= STBSP__METRIC_SUFFIX; + } + ++f; + continue; + // if we don't want space between metric suffix and number + case '_': + fl |= STBSP__METRIC_NOSPACE; + ++f; + continue; + // if we have leading zero + case '0': + fl |= STBSP__LEADINGZERO; + ++f; + goto flags_done; + default: goto flags_done; + } + } + flags_done: + + // get the field width + if (f[0] == '*') { + fw = va_arg(va, stbsp__uint32); + ++f; + } else { + while ((f[0] >= '0') && (f[0] <= '9')) { + fw = fw * 10 + f[0] - '0'; + f++; + } + } + // get the precision + if (f[0] == '.') { + ++f; + if (f[0] == '*') { + pr = va_arg(va, stbsp__uint32); + ++f; + } else { + pr = 0; + while ((f[0] >= '0') && (f[0] <= '9')) { + pr = pr * 10 + f[0] - '0'; + f++; + } + } + } + + // handle integer size overrides + switch (f[0]) { + // are we halfwidth? + case 'h': + fl |= STBSP__HALFWIDTH; + ++f; + if (f[0] == 'h') + ++f; // QUARTERWIDTH + break; + // are we 64-bit (unix style) + case 'l': + fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0); + ++f; + if (f[0] == 'l') { + fl |= STBSP__INTMAX; + ++f; + } + break; + // are we 64-bit on intmax? (c99) + case 'j': + fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + // are we 64-bit on size_t or ptrdiff_t? (c99) + case 'z': + fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + case 't': + fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + // are we 64-bit (msft style) + case 'I': + if ((f[1] == '6') && (f[2] == '4')) { + fl |= STBSP__INTMAX; + f += 3; + } else if ((f[1] == '3') && (f[2] == '2')) { + f += 3; + } else { + fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0); + ++f; + } + break; + default: break; + } + + // handle each replacement + switch (f[0]) { + #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307 + char num[STBSP__NUMSZ]; + char lead[8]; + char tail[8]; + char *s; + char const *h; + stbsp__uint32 l, n, cs; + stbsp__uint64 n64; +#ifndef STB_SPRINTF_NOFLOAT + double fv; +#endif + stbsp__int32 dp; + char const *sn; + + case 's': + // get the string + s = va_arg(va, char *); + if (s == 0) + s = (char *)"null"; + // get the length, limited to desired precision + // always limit to ~0u chars since our counts are 32b + l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u); + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + // copy the string in + goto scopy; + + case 'c': // char + // get the character + s = num + STBSP__NUMSZ - 1; + *s = (char)va_arg(va, int); + l = 1; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + + case 'n': // weird write-bytes specifier + { + int *d = va_arg(va, int *); + *d = tlen + (int)(bf - buf); + } break; + +#ifdef STB_SPRINTF_NOFLOAT + case 'A': // float + case 'a': // hex float + case 'G': // float + case 'g': // float + case 'E': // float + case 'e': // float + case 'f': // float + va_arg(va, double); // eat it + s = (char *)"No float"; + l = 8; + lead[0] = 0; + tail[0] = 0; + pr = 0; + cs = 0; + STBSP__NOTUSED(dp); + goto scopy; +#else + case 'A': // hex float + case 'a': // hex float + h = (f[0] == 'A') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv)) + fl |= STBSP__NEGATIVE; + + s = num + 64; + + stbsp__lead_sign(fl, lead); + + if (dp == -1023) + dp = (n64) ? -1022 : 0; + else + n64 |= (((stbsp__uint64)1) << 52); + n64 <<= (64 - 56); + if (pr < 15) + n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4)); +// add leading chars + +#ifdef STB_SPRINTF_MSVC_MODE + *s++ = '0'; + *s++ = 'x'; +#else + lead[1 + lead[0]] = '0'; + lead[2 + lead[0]] = 'x'; + lead[0] += 2; +#endif + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + if (pr) + *s++ = stbsp__period; + sn = s; + + // print the bits + n = pr; + if (n > 13) + n = 13; + if (pr > (stbsp__int32)n) + tz = pr - n; + pr = 0; + while (n--) { + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + } + + // print the expo + tail[1] = h[17]; + if (dp < 0) { + tail[2] = '-'; + dp = -dp; + } else + tail[2] = '+'; + n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3)); + tail[0] = (char)n; + for (;;) { + tail[n] = '0' + dp % 10; + if (n <= 3) + break; + --n; + dp /= 10; + } + + dp = (int)(s - sn); + l = (int)(s - (num + 64)); + s = num + 64; + cs = 1 + (3 << 24); + goto scopy; + + case 'G': // float + case 'g': // float + h = (f[0] == 'G') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; + else if (pr == 0) + pr = 1; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000)) + fl |= STBSP__NEGATIVE; + + // clamp the precision and delete extra zeros after clamp + n = pr; + if (l > (stbsp__uint32)pr) + l = pr; + while ((l > 1) && (pr) && (sn[l - 1] == '0')) { + --pr; + --l; + } + + // should we use %e + if ((dp <= -4) || (dp > (stbsp__int32)n)) { + if (pr > (stbsp__int32)l) + pr = l - 1; + else if (pr) + --pr; // when using %e, there is one digit before the decimal + goto doexpfromg; + } + // this is the insane action to get the pr to match %g semantics for %f + if (dp > 0) { + pr = (dp < (stbsp__int32)l) ? l - dp : 0; + } else { + pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr); + } + goto dofloatfromg; + + case 'E': // float + case 'e': // float + h = (f[0] == 'E') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000)) + fl |= STBSP__NEGATIVE; + doexpfromg: + tail[0] = 0; + stbsp__lead_sign(fl, lead); + if (dp == STBSP__SPECIAL) { + s = (char *)sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + // handle leading chars + *s++ = sn[0]; + + if (pr) + *s++ = stbsp__period; + + // handle after decimal + if ((l - 1) > (stbsp__uint32)pr) + l = pr + 1; + for (n = 1; n < l; n++) + *s++ = sn[n]; + // trailing zeros + tz = pr - (l - 1); + pr = 0; + // dump expo + tail[1] = h[0xe]; + dp -= 1; + if (dp < 0) { + tail[2] = '-'; + dp = -dp; + } else + tail[2] = '+'; +#ifdef STB_SPRINTF_MSVC_MODE + n = 5; +#else + n = (dp >= 100) ? 5 : 4; +#endif + tail[0] = (char)n; + for (;;) { + tail[n] = '0' + dp % 10; + if (n <= 3) + break; + --n; + dp /= 10; + } + cs = 1 + (3 << 24); // how many tens + goto flt_lead; + + case 'f': // float + fv = va_arg(va, double); + doafloat: + // do kilos + if (fl & STBSP__METRIC_SUFFIX) { + double divisor; + divisor = 1000.0f; + if (fl & STBSP__METRIC_1024) + divisor = 1024.0; + while (fl < 0x4000000) { + if ((fv < divisor) && (fv > -divisor)) + break; + fv /= divisor; + fl += 0x1000000; + } + } + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr)) + fl |= STBSP__NEGATIVE; + dofloatfromg: + tail[0] = 0; + stbsp__lead_sign(fl, lead); + if (dp == STBSP__SPECIAL) { + s = (char *)sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + + // handle the three decimal varieties + if (dp <= 0) { + stbsp__int32 i; + // handle 0.000*000xxxx + *s++ = '0'; + if (pr) + *s++ = stbsp__period; + n = -dp; + if ((stbsp__int32)n > pr) + n = pr; + i = n; + while (i) { + if ((((stbsp__uintptr)s) & 3) == 0) + break; + *s++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)s = 0x30303030; + s += 4; + i -= 4; + } + while (i) { + *s++ = '0'; + --i; + } + if ((stbsp__int32)(l + n) > pr) + l = pr - n; + i = l; + while (i) { + *s++ = *sn++; + --i; + } + tz = pr - (n + l); + cs = 1 + (3 << 24); // how many tens did we write (for commas below) + } else { + cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0; + if ((stbsp__uint32)dp >= l) { + // handle xxxx000*000.0 + n = 0; + for (;;) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = sn[n]; + ++n; + if (n >= l) + break; + } + } + if (n < (stbsp__uint32)dp) { + n = dp - n; + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + while (n) { + if ((((stbsp__uintptr)s) & 3) == 0) + break; + *s++ = '0'; + --n; + } + while (n >= 4) { + *(stbsp__uint32 *)s = 0x30303030; + s += 4; + n -= 4; + } + } + while (n) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = '0'; + --n; + } + } + } + cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens + if (pr) { + *s++ = stbsp__period; + tz = pr; + } + } else { + // handle xxxxx.xxxx000*000 + n = 0; + for (;;) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = sn[n]; + ++n; + if (n >= (stbsp__uint32)dp) + break; + } + } + cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens + if (pr) + *s++ = stbsp__period; + if ((l - dp) > (stbsp__uint32)pr) + l = pr + dp; + while (n < l) { + *s++ = sn[n]; + ++n; + } + tz = pr - (l - dp); + } + } + pr = 0; + + // handle k,m,g,t + if (fl & STBSP__METRIC_SUFFIX) { + char idx; + idx = 1; + if (fl & STBSP__METRIC_NOSPACE) + idx = 0; + tail[0] = idx; + tail[1] = ' '; + { + if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'. + if (fl & STBSP__METRIC_1024) + tail[idx + 1] = "_KMGT"[fl >> 24]; + else + tail[idx + 1] = "_kMGT"[fl >> 24]; + idx++; + // If printing kibits and not in jedec, add the 'i'. + if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) { + tail[idx + 1] = 'i'; + idx++; + } + tail[0] = idx; + } + } + }; + + flt_lead: + // get the length that we copied + l = (stbsp__uint32)(s - (num + 64)); + s = num + 64; + goto scopy; +#endif + + case 'B': // upper binary + case 'b': // lower binary + h = (f[0] == 'B') ? hexu : hex; + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[0xb]; + } + l = (8 << 4) | (1 << 8); + goto radixnum; + + case 'o': // octal + h = hexu; + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 1; + lead[1] = '0'; + } + l = (3 << 4) | (3 << 8); + goto radixnum; + + case 'p': // pointer + fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0; + pr = sizeof(void *) * 2; + fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros + // fall through - to X + + case 'X': // upper hex + case 'x': // lower hex + h = (f[0] == 'X') ? hexu : hex; + l = (4 << 4) | (4 << 8); + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[16]; + } + radixnum: + // get the number + if (fl & STBSP__INTMAX) + n64 = va_arg(va, stbsp__uint64); + else + n64 = va_arg(va, stbsp__uint32); + + s = num + STBSP__NUMSZ; + dp = 0; + // clear tail, and clear leading if value is zero + tail[0] = 0; + if (n64 == 0) { + lead[0] = 0; + if (pr == 0) { + l = 0; + cs = 0; + goto scopy; + } + } + // convert to string + for (;;) { + *--s = h[n64 & ((1 << (l >> 8)) - 1)]; + n64 >>= (l >> 8); + if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr))) + break; + if (fl & STBSP__TRIPLET_COMMA) { + ++l; + if ((l & 15) == ((l >> 4) & 15)) { + l &= ~15; + *--s = stbsp__comma; + } + } + }; + // get the tens and the comma pos + cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24); + // get the length that we copied + l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); + // copy it + goto scopy; + + case 'u': // unsigned + case 'i': + case 'd': // integer + // get the integer and abs it + if (fl & STBSP__INTMAX) { + stbsp__int64 i64 = va_arg(va, stbsp__int64); + n64 = (stbsp__uint64)i64; + if ((f[0] != 'u') && (i64 < 0)) { + n64 = (stbsp__uint64)-i64; + fl |= STBSP__NEGATIVE; + } + } else { + stbsp__int32 i = va_arg(va, stbsp__int32); + n64 = (stbsp__uint32)i; + if ((f[0] != 'u') && (i < 0)) { + n64 = (stbsp__uint32)-i; + fl |= STBSP__NEGATIVE; + } + } + +#ifndef STB_SPRINTF_NOFLOAT + if (fl & STBSP__METRIC_SUFFIX) { + if (n64 < 1024) + pr = 0; + else if (pr == -1) + pr = 1; + fv = (double)(stbsp__int64)n64; + goto doafloat; + } +#endif + + // convert to string + s = num + STBSP__NUMSZ; + l = 0; + + for (;;) { + // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators) + char *o = s - 8; + if (n64 >= 100000000) { + n = (stbsp__uint32)(n64 % 100000000); + n64 /= 100000000; + } else { + n = (stbsp__uint32)n64; + n64 = 0; + } + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + do { + s -= 2; + *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; + n /= 100; + } while (n); + } + while (n) { + if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { + l = 0; + *--s = stbsp__comma; + --o; + } else { + *--s = (char)(n % 10) + '0'; + n /= 10; + } + } + if (n64 == 0) { + if ((s[0] == '0') && (s != (num + STBSP__NUMSZ))) + ++s; + break; + } + while (s != o) + if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { + l = 0; + *--s = stbsp__comma; + --o; + } else { + *--s = '0'; + } + } + + tail[0] = 0; + stbsp__lead_sign(fl, lead); + + // get the length that we copied + l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); + if (l == 0) { + *--s = '0'; + l = 1; + } + cs = l + (3 << 24); + if (pr < 0) + pr = 0; + + scopy: + // get fw=leading/trailing space, pr=leading zeros + if (pr < (stbsp__int32)l) + pr = l; + n = pr + lead[0] + tail[0] + tz; + if (fw < (stbsp__int32)n) + fw = n; + fw -= n; + pr -= l; + + // handle right justify and leading zeros + if ((fl & STBSP__LEFTJUST) == 0) { + if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr + { + pr = (fw > pr) ? fw : pr; + fw = 0; + } else { + fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas + } + } + + // copy the spaces and/or zeros + if (fw + pr) { + stbsp__int32 i; + stbsp__uint32 c; + + // copy leading spaces (or when doing %8.4d stuff) + if ((fl & STBSP__LEFTJUST) == 0) + while (fw > 0) { + stbsp__cb_buf_clamp(i, fw); + fw -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x20202020; + bf += 4; + i -= 4; + } + while (i) { + *bf++ = ' '; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy leader + sn = lead + 1; + while (lead[0]) { + stbsp__cb_buf_clamp(i, lead[0]); + lead[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy leading zeros + c = cs >> 24; + cs &= 0xffffff; + cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0; + while (pr > 0) { + stbsp__cb_buf_clamp(i, pr); + pr -= i; + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x30303030; + bf += 4; + i -= 4; + } + } + while (i) { + if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) { + cs = 0; + *bf++ = stbsp__comma; + } else + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf(1); + } + } + + // copy leader if there is still one + sn = lead + 1; + while (lead[0]) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, lead[0]); + lead[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy the string + n = l; + while (n) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, n); + n -= i; + STBSP__UNALIGNED(while (i >= 4) { + *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s; + bf += 4; + s += 4; + i -= 4; + }) + while (i) { + *bf++ = *s++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy trailing zeros + while (tz) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, tz); + tz -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x30303030; + bf += 4; + i -= 4; + } + while (i) { + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy tail if there is one + sn = tail + 1; + while (tail[0]) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, tail[0]); + tail[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // handle the left justify + if (fl & STBSP__LEFTJUST) + if (fw > 0) { + while (fw) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, fw); + fw -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x20202020; + bf += 4; + i -= 4; + } + while (i--) + *bf++ = ' '; + stbsp__chk_cb_buf(1); + } + } + break; + + default: // unknown, just copy code + s = num + STBSP__NUMSZ - 1; + *s = f[0]; + l = 1; + fw = fl = 0; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + } + ++f; + } +endfmt: + + if (!callback) + *bf = 0; + else + stbsp__flush_cb(); + +done: + return tlen + (int)(bf - buf); +} + +// cleanup +#undef STBSP__LEFTJUST +#undef STBSP__LEADINGPLUS +#undef STBSP__LEADINGSPACE +#undef STBSP__LEADING_0X +#undef STBSP__LEADINGZERO +#undef STBSP__INTMAX +#undef STBSP__TRIPLET_COMMA +#undef STBSP__NEGATIVE +#undef STBSP__METRIC_SUFFIX +#undef STBSP__NUMSZ +#undef stbsp__chk_cb_bufL +#undef stbsp__chk_cb_buf +#undef stbsp__flush_cb +#undef stbsp__cb_buf_clamp + +// ============================================================================ +// wrapper functions + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) +{ + int result; + va_list va; + va_start(va, fmt); + result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); + va_end(va); + return result; +} + +typedef struct stbsp__context { + char *buf; + int count; + int length; + char tmp[STB_SPRINTF_MIN]; +} stbsp__context; + +static char *stbsp__clamp_callback(const char *buf, void *user, int len) +{ + stbsp__context *c = (stbsp__context *)user; + c->length += len; + + if (len > c->count) + len = c->count; + + if (len) { + if (buf != c->buf) { + const char *s, *se; + char *d; + d = c->buf; + s = buf; + se = buf + len; + do { + *d++ = *s++; + } while (s < se); + } + c->buf += len; + c->count -= len; + } + + if (c->count <= 0) + return c->tmp; + return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can +} + +static char * stbsp__count_clamp_callback( const char * buf, void * user, int len ) +{ + stbsp__context * c = (stbsp__context*)user; + (void) sizeof(buf); + + c->length += len; + return c->tmp; // go direct into buffer if you can +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va ) +{ + stbsp__context c; + + if ( (count == 0) && !buf ) + { + c.length = 0; + + STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va ); + } + else + { + int l; + + c.buf = buf; + c.count = count; + c.length = 0; + + STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va ); + + // zero-terminate + l = (int)( c.buf - buf ); + if ( l >= count ) // should never be greater, only equal (or less) than count + l = count - 1; + buf[l] = 0; + } + + return c.length; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) +{ + int result; + va_list va; + va_start(va, fmt); + + result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va); + va_end(va); + + return result; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va) +{ + return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); +} + +// ======================================================================= +// low level float utility functions + +#ifndef STB_SPRINTF_NOFLOAT + +// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox) +#define STBSP__COPYFP(dest, src) \ + { \ + int cn; \ + for (cn = 0; cn < 8; cn++) \ + ((char *)&dest)[cn] = ((char *)&src)[cn]; \ + } + +// get float info +static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value) +{ + double d; + stbsp__int64 b = 0; + + // load value and round at the frac_digits + d = value; + + STBSP__COPYFP(b, d); + + *bits = b & ((((stbsp__uint64)1) << 52) - 1); + *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023); + + return (stbsp__int32)((stbsp__uint64) b >> 63); +} + +static double const stbsp__bot[23] = { + 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, + 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022 +}; +static double const stbsp__negbot[22] = { + 1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011, + 1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022 +}; +static double const stbsp__negboterr[22] = { + -5.551115123125783e-018, -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023, + 4.5251888174113739e-024, -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028, 2.0113352370744385e-029, + -3.0373745563400371e-030, 1.1806906454401013e-032, -7.7705399876661076e-032, 2.0902213275965398e-033, -7.1542424054621921e-034, -7.1542424054621926e-035, + 2.4754073164739869e-036, 5.4846728545790429e-037, 9.2462547772103625e-038, -4.8596774326570872e-039 +}; +static double const stbsp__top[13] = { + 1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299 +}; +static double const stbsp__negtop[13] = { + 1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299 +}; +static double const stbsp__toperr[13] = { + 8388608, + 6.8601809640529717e+028, + -7.253143638152921e+052, + -4.3377296974619174e+075, + -1.5559416129466825e+098, + -3.2841562489204913e+121, + -3.7745893248228135e+144, + -1.7356668416969134e+167, + -3.8893577551088374e+190, + -9.9566444326005119e+213, + 6.3641293062232429e+236, + -5.2069140800249813e+259, + -5.2504760255204387e+282 +}; +static double const stbsp__negtoperr[13] = { + 3.9565301985100693e-040, -2.299904345391321e-063, 3.6506201437945798e-086, 1.1875228833981544e-109, + -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178, -5.7778912386589953e-201, + 7.4997100559334532e-224, -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293, + 8.0970921678014997e-317 +}; + +#if defined(_MSC_VER) && (_MSC_VER <= 1200) +static stbsp__uint64 const stbsp__powten[20] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 1000000000000000000, + 10000000000000000000U +}; +#define stbsp__tento19th ((stbsp__uint64)1000000000000000000) +#else +static stbsp__uint64 const stbsp__powten[20] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + 10000000000000000000ULL +}; +#define stbsp__tento19th (1000000000000000000ULL) +#endif + +#define stbsp__ddmulthi(oh, ol, xh, yh) \ + { \ + double ahi = 0, alo, bhi = 0, blo; \ + stbsp__int64 bt; \ + oh = xh * yh; \ + STBSP__COPYFP(bt, xh); \ + bt &= ((~(stbsp__uint64)0) << 27); \ + STBSP__COPYFP(ahi, bt); \ + alo = xh - ahi; \ + STBSP__COPYFP(bt, yh); \ + bt &= ((~(stbsp__uint64)0) << 27); \ + STBSP__COPYFP(bhi, bt); \ + blo = yh - bhi; \ + ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \ + } + +#define stbsp__ddtoS64(ob, xh, xl) \ + { \ + double ahi = 0, alo, vh, t; \ + ob = (stbsp__int64)xh; \ + vh = (double)ob; \ + ahi = (xh - vh); \ + t = (ahi - xh); \ + alo = (xh - (ahi - t)) - (vh + t); \ + ob += (stbsp__int64)(ahi + alo + xl); \ + } + +#define stbsp__ddrenorm(oh, ol) \ + { \ + double s; \ + s = oh + ol; \ + ol = ol - (s - oh); \ + oh = s; \ + } + +#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh); + +#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl); + +static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350 +{ + double ph, pl; + if ((power >= 0) && (power <= 22)) { + stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]); + } else { + stbsp__int32 e, et, eb; + double p2h, p2l; + + e = power; + if (power < 0) + e = -e; + et = (e * 0x2c9) >> 14; /* %23 */ + if (et > 13) + et = 13; + eb = e - (et * 23); + + ph = d; + pl = 0.0; + if (power < 0) { + if (eb) { + --eb; + stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]); + stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]); + } + if (et) { + stbsp__ddrenorm(ph, pl); + --et; + stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]); + stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]); + ph = p2h; + pl = p2l; + } + } else { + if (eb) { + e = eb; + if (eb > 22) + eb = 22; + e -= eb; + stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]); + if (e) { + stbsp__ddrenorm(ph, pl); + stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]); + stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl); + ph = p2h; + pl = p2l; + } + } + if (et) { + stbsp__ddrenorm(ph, pl); + --et; + stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]); + stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]); + ph = p2h; + pl = p2l; + } + } + } + stbsp__ddrenorm(ph, pl); + *ohi = ph; + *olo = pl; +} + +// given a float value, returns the significant bits in bits, and the position of the +// decimal point in decimal_pos. +/-INF and NAN are specified by special values +// returned in the decimal_pos parameter. +// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000 +static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits) +{ + double d; + stbsp__int64 bits = 0; + stbsp__int32 expo, e, ng, tens; + + d = value; + STBSP__COPYFP(bits, d); + expo = (stbsp__int32)((bits >> 52) & 2047); + ng = (stbsp__int32)((stbsp__uint64) bits >> 63); + if (ng) + d = -d; + + if (expo == 2047) // is nan or inf? + { + *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf"; + *decimal_pos = STBSP__SPECIAL; + *len = 3; + return ng; + } + + if (expo == 0) // is zero or denormal + { + if (((stbsp__uint64) bits << 1) == 0) // do zero + { + *decimal_pos = 1; + *start = out; + out[0] = '0'; + *len = 1; + return ng; + } + // find the right expo for denormals + { + stbsp__int64 v = ((stbsp__uint64)1) << 51; + while ((bits & v) == 0) { + --expo; + v >>= 1; + } + } + } + + // find the decimal exponent as well as the decimal bits of the value + { + double ph, pl; + + // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046 + tens = expo - 1023; + tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1); + + // move the significant bits into position and stick them into an int + stbsp__raise_to_power10(&ph, &pl, d, 18 - tens); + + // get full as much precision from double-double as possible + stbsp__ddtoS64(bits, ph, pl); + + // check if we undershot + if (((stbsp__uint64)bits) >= stbsp__tento19th) + ++tens; + } + + // now do the rounding in integer land + frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits); + if ((frac_digits < 24)) { + stbsp__uint32 dg = 1; + if ((stbsp__uint64)bits >= stbsp__powten[9]) + dg = 10; + while ((stbsp__uint64)bits >= stbsp__powten[dg]) { + ++dg; + if (dg == 20) + goto noround; + } + if (frac_digits < dg) { + stbsp__uint64 r; + // add 0.5 at the right position and round + e = dg - frac_digits; + if ((stbsp__uint32)e >= 24) + goto noround; + r = stbsp__powten[e]; + bits = bits + (r / 2); + if ((stbsp__uint64)bits >= stbsp__powten[dg]) + ++tens; + bits /= r; + } + noround:; + } + + // kill long trailing runs of zeros + if (bits) { + stbsp__uint32 n; + for (;;) { + if (bits <= 0xffffffff) + break; + if (bits % 1000) + goto donez; + bits /= 1000; + } + n = (stbsp__uint32)bits; + while ((n % 1000) == 0) + n /= 1000; + bits = n; + donez:; + } + + // convert to string + out += 64; + e = 0; + for (;;) { + stbsp__uint32 n; + char *o = out - 8; + // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned) + if (bits >= 100000000) { + n = (stbsp__uint32)(bits % 100000000); + bits /= 100000000; + } else { + n = (stbsp__uint32)bits; + bits = 0; + } + while (n) { + out -= 2; + *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; + n /= 100; + e += 2; + } + if (bits == 0) { + if ((e) && (out[0] == '0')) { + ++out; + --e; + } + break; + } + while (out != o) { + *--out = '0'; + ++e; + } + } + + *decimal_pos = tens; + *start = out; + *len = e; + return ng; +} + +#undef stbsp__ddmulthi +#undef stbsp__ddrenorm +#undef stbsp__ddmultlo +#undef stbsp__ddmultlos +#undef STBSP__SPECIAL +#undef STBSP__COPYFP + +#endif // STB_SPRINTF_NOFLOAT + +// clean up +#undef stbsp__uint16 +#undef stbsp__uint32 +#undef stbsp__int32 +#undef stbsp__uint64 +#undef stbsp__int64 +#undef STBSP__UNALIGNED + +#endif // STB_SPRINTF_IMPLEMENTATION + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/src/runtime/aio.c b/src/runtime/aio.c index ec5dd45..3b6beb5 100644 --- a/src/runtime/aio.c +++ b/src/runtime/aio.c @@ -1,6 +1,6 @@ #include "aio.h" -#include "threading.h" #include "config.h" +#include "threading.h" #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN @@ -111,7 +111,7 @@ RT_CVAR_I(rt_MaxConcurrentAsyncIO, rt_result InitAIO(void) { unsigned int max_concurrent_operations = rt_MaxConcurrentAsyncIO.i; - _ringbuffer.guard = rtCreateMutex(); + _ringbuffer.guard = rtCreateMutex(); if (!_ringbuffer.guard) { return RT_AIO_OUT_OF_MEMORY; } @@ -223,6 +223,96 @@ RT_DLLEXPORT rt_result rtSubmitLoadBatch(const rt_load_batch *batch, rt_aio_hand return RT_SUCCESS; } +RT_DLLEXPORT rt_result rtSubmitWriteBatch(const rt_write_batch *batch, rt_aio_handle *handles) { + if (batch->num_writes > RT_LOAD_BATCH_MAX_SIZE) { + return RT_AIO_WRITE_TOO_LARGE; + } + + rt_ringbuffer_space rbspace = ReserveRingbufferSpace(batch->num_writes); + if (!rbspace.a) { + rtReportError("aio", "Too many pending file operations"); + return RT_AIO_TOO_MANY_OPERATIONS; + } + + for (unsigned int i = 0; i < batch->num_writes; ++i) { + rt_aio *op = (i < rbspace.a_count) ? &rbspace.a[i] : &rbspace.b[i - rbspace.a_count]; + op->state = RT_AIO_STATE_PENDING; + const char *file_path = rtGetFilePath(batch->writes[i].file); + if (!file_path) { + rtReportError("aio", "Failed to resolve file path for a batched write"); + op->state = RT_AIO_STATE_INVALID; + handles[i] = RT_AIO_INVALID_HANDLE; + continue; + } +#ifdef _WIN32 + op->overlapped = (OVERLAPPED){ + /* ReadFileEx does not use hEvent and we are free to use it for our own purposes. */ + .hEvent = (HANDLE)(op), + .Internal = 0, + .InternalHigh = 0, + .Offset = (DWORD)(batch->writes[i].offset & MAXDWORD), + .OffsetHigh = (DWORD)(batch->writes[i].offset >> 32), + }; + + WCHAR wpath[MAX_PATH]; + if (MultiByteToWideChar(CP_UTF8, + MB_PRECOMPOSED, + file_path, + -1, + wpath, + RT_ARRAY_COUNT(wpath)) == 0) { + rtReportError("aio", "MultiByteToWideChar failed with error code: %u", GetLastError()); + op->state = RT_AIO_STATE_FINISHED; + handles[i] = RT_AIO_INVALID_HANDLE; + continue; + } + + HANDLE file_handle = CreateFileW(wpath, + GENERIC_WRITE, + 0, + NULL, + OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, + NULL); + if (file_handle == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + char error_msg[256]; + Win32ErrorToString(err, error_msg, 256); + rtReportError("aio", + "CreateFileW failed for file: %s with error code: %u (%s)", + file_path, + err, + error_msg); + op->state = RT_AIO_STATE_INVALID; + handles[i] = RT_AIO_INVALID_HANDLE; + continue; + } + op->file_handle = file_handle; + BOOL result = WriteFileEx(file_handle, + batch->writes[i].buffer, + (DWORD)batch->writes[i].num_bytes, + &op->overlapped, + win32CompletionRoutine); + DWORD err = GetLastError(); + if (!result || (err != ERROR_SUCCESS && err != ERROR_ALREADY_EXISTS)) { + char error_msg[256]; + Win32ErrorToString(err, error_msg, 256); + rtReportError("aio", "WriteFileEx failed with error code: %u (%s)", err, error_msg); + op->state = RT_AIO_STATE_FINISHED; + handles[i] = RT_AIO_INVALID_HANDLE; + CloseHandle(file_handle); + op->file_handle = NULL; + } + + /* Handle is the index into the ringbuffer + 1 */ + ptrdiff_t op_idx = op - _ringbuffer.storage; + handles[i] = (uint32_t)op_idx + 1; +#endif + } + + return RT_SUCCESS; +} + RT_DLLEXPORT volatile rt_aio_state rtGetAIOState(rt_aio_handle handle) { if (handle == RT_AIO_INVALID_HANDLE || handle > _ringbuffer.capacity) return RT_AIO_STATE_INVALID; @@ -270,10 +360,9 @@ RT_DLLEXPORT rt_aio_state rtWaitForAIOCompletion(rt_aio_handle handle) { return state; } - RT_DLLEXPORT rt_result rtSubmitSingleLoad(rt_file_load load, rt_aio_handle *handle) { rt_load_batch batch; - batch.loads[0] = load; + batch.loads[0] = load; batch.num_loads = 1; return rtSubmitLoadBatch(&batch, handle); } diff --git a/src/runtime/aio.h b/src/runtime/aio.h index da62257..7b95e53 100644 --- a/src/runtime/aio.h +++ b/src/runtime/aio.h @@ -33,13 +33,35 @@ typedef struct { unsigned int num_loads; } rt_load_batch; +typedef struct { + size_t num_bytes; /** Number of bytes to write */ + size_t offset; /** Offset at which to start writing */ + /* Source buffer with at least num_bytes bytes. + * Must be valid until the write is finished. + */ + const void *buffer; + rt_file_id file; +} rt_file_write; + +#define RT_WRITE_OFFSET_APPEND ((size_t)-1) + +#define RT_WRITE_BATCH_MAX_SIZE 64 + +/* A batch of writes that will be started together. + * The aio system will hand these to the OS. */ +typedef struct { + rt_file_write writes[RT_WRITE_BATCH_MAX_SIZE]; + unsigned int num_writes; +} rt_write_batch; + #define RT_AIO_INVALID_HANDLE 0 /** Handle for an async io operation. Can be used to query the state and result. */ typedef uint32_t rt_aio_handle; enum { - RT_AIO_LOAD_TOO_LARGE = (RT_SUCCESS + 1), + RT_AIO_LOAD_TOO_LARGE = RT_CUSTOM_ERROR_START, + RT_AIO_WRITE_TOO_LARGE, RT_AIO_TOO_MANY_OPERATIONS, RT_AIO_OUT_OF_MEMORY, }; @@ -51,14 +73,16 @@ typedef enum { RT_AIO_STATE_FAILED, } rt_aio_state; -RT_DLLEXPORT rt_result rtSubmitLoadBatch(const rt_load_batch *batch, rt_aio_handle *handles); - RT_DLLEXPORT volatile rt_aio_state rtGetAIOState(rt_aio_handle handle); /* Blocks until the given operation is no longer pending. - * Returns the state that caused the wait to end. The handle is still valid after this function returned. */ + * Returns the state that caused the wait to end. The handle is still valid after this function + * returned. */ RT_DLLEXPORT rt_aio_state rtWaitForAIOCompletion(rt_aio_handle handle); + +RT_DLLEXPORT rt_result rtSubmitLoadBatch(const rt_load_batch *batch, rt_aio_handle *handles); + /* Releases the internal storage for the operation. * The system is allowed to re-use the same handle value for new operations after this was called. */ @@ -70,6 +94,8 @@ RT_DLLEXPORT rt_result rtSubmitSingleLoad(rt_file_load load, rt_aio_handle *hand * Returns the state that caused the wait for completion to return. */ RT_DLLEXPORT rt_aio_state rtSubmitSingleLoadSync(rt_file_load load); +RT_DLLEXPORT rt_result rtSubmitWriteBatch(const rt_write_batch *batch, rt_aio_handle *handles); + #ifdef __cplusplus } #endif diff --git a/src/runtime/assert.c b/src/runtime/assert.c new file mode 100644 index 0000000..afd6d53 --- /dev/null +++ b/src/runtime/assert.c @@ -0,0 +1,45 @@ +#include "runtime.h" +#include "config.h" + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#include + +RT_CVAR_I(rt_AssertEnabled, "Enables or disables asserts in non-release builds. Default: 1", 1); + +#define ASSERT_HANDLER_DBGBREAK 0 +#define ASSERT_HANDLER_CONTINUE 1 + +RT_DLLEXPORT int rtAssertHandler(const char *expr, const char *msg, const char *file, int line) { + if (!rt_AssertEnabled.i) + return ASSERT_HANDLER_CONTINUE; + rtLog("ASSERT", "[%s:%d] Assertion (%s) failed: %s", file, line, expr, msg); +#ifdef _WIN32 + char outmessage[512]; + snprintf(outmessage, + 511, + "Assertion failed: %s\nMessage: %s\n%s:%d\nPress \"Yes\" to debug-break, \"No\" to " + "continue with asserts enabled or \"Cancel\" to disable asserts.", + expr, + msg, + file, + line); + outmessage[511] = '\0'; + + + DWORD action = MessageBoxA(NULL, outmessage, "Assertion Failed", MB_YESNOCANCEL | MB_ICONERROR); + if (action == IDYES) { + return ASSERT_HANDLER_DBGBREAK; + } else if (action == IDCANCEL) { + rt_AssertEnabled.i = 0; + } else if (action != IDNO) { + rtReportError("CORE", "MessageBoxA for a failed assertion failed."); + __debugbreak(); + ExitProcess(1); + } +#endif + return ASSERT_HANDLER_CONTINUE; +} \ No newline at end of file diff --git a/src/runtime/asset_compiler.c b/src/runtime/asset_compiler.c index 6a8df6f..d4a798c 100644 --- a/src/runtime/asset_compiler.c +++ b/src/runtime/asset_compiler.c @@ -1,15 +1,17 @@ +#include "asset_compiler.h" +#include "buffer_manager.h" +#include "config.h" +#include "file_tab.h" +#include "fsutils.h" +#include "mem_arena.h" +#include "resources.h" #include "runtime.h" #include "threading.h" -#include "config.h" -#include "fsutils.h" -#include "file_tab.h" -#include "mem_arena.h" -#include "buffer_manager.h" -#include #include -#include #include +#include +#include #ifndef RT_BUILD_ASSET_COMPILER #error This should only be built when RT_BUILD_ASSET_COMPILER is defined. @@ -17,15 +19,17 @@ typedef struct { uint64_t last_processed; + rt_resource_id resources[RT_MAX_RESOURCES_PER_ASSET]; + unsigned int resource_count; + bool in_processing; } rt_asset_data; typedef struct { rt_file_id *files; rt_asset_data *data; + rt_rwlock lock; } rt_asset_db; -typedef rt_result rt_asset_processor_fn(rt_file_id file, rt_arena *arena); - typedef struct { const char *file_ext; rt_asset_processor_fn *proc; @@ -60,7 +64,7 @@ static rt_asset_db _asset_db; static rt_processing_queue _processing_queue; -extern rt_result PipelineProcessor(rt_file_id file, rt_arena *arena); +extern RT_ASSET_PROCESSOR_FN(PipelineProcessor); static rt_asset_processor _processors[] = { {.file_ext = ".pipeline", .proc = PipelineProcessor} @@ -78,6 +82,12 @@ rt_result InitAssetCompiler(void) { _asset_db.files = mem; _asset_db.data = (rt_asset_data *)(_asset_db.files + db_size); memset(mem, 0, (sizeof(rt_file_id) + sizeof(rt_asset_data)) * db_size); + rt_create_rwlock_result lock_create = rtCreateRWLock(); + if (!lock_create.ok) { + free(mem); + return RT_UNKNOWN_ERROR; + } + _asset_db.lock = lock_create.lock; _processing_queue.lock = rtCreateConditionVar(); if (!_processing_queue.lock) { @@ -94,7 +104,7 @@ rt_result InitAssetCompiler(void) { rt_AssetProcessingThreads.i = MAX_PROCESSING_THREADS; for (int i = 0; i < rt_AssetProcessingThreads.i; ++i) { char name[64]; - snprintf(name, 64, "AssetProcessorThread %d", i); + rtSPrint(name, 64, "AssetProcessorThread %d", i); _processing_threads[i] = rtSpawnThread(ProcessorThreadEntry, NULL, name); if (!_processing_threads[i]) { /* Wake the processing threads */ @@ -120,12 +130,13 @@ void ShutdownAssetCompiler(void) { rtJoinThread(_processing_threads[i]); free(_asset_db.files); rtDestroyConditionVar(_processing_queue.lock); + rtDestroyRWLock(&_asset_db.lock); } static int DiscoverAssets(void) { /* Recursive descend into the asset directory */ #define MAX_DISCOVERY_DEPTH 64 -#define MAX_FILENAME_LEN 260 +#define MAX_FILENAME_LEN 260 static char directory_stack[MAX_DISCOVERY_DEPTH][MAX_FILENAME_LEN]; static unsigned int path_lens[MAX_DISCOVERY_DEPTH]; unsigned int top = 0; @@ -136,7 +147,7 @@ static int DiscoverAssets(void) { : MAX_FILENAME_LEN); directory_stack[0][MAX_FILENAME_LEN - 1] = '\0'; ++top; - + int discovery_count = 0; while (top > 0) { @@ -154,7 +165,7 @@ static int DiscoverAssets(void) { rt_dirent entry; do { entry = rtNextDirectoryEntry(scan); - + if (entry.name[0] == '.') continue; @@ -187,20 +198,29 @@ static int DiscoverAssets(void) { rt_file_id fid = rtAddFile(file); unsigned int i = 0; + rtLockWrite(&_asset_db.lock); while (i < (unsigned int)rt_AssetDBSize.i) { unsigned int slot = (fid + i) % (unsigned int)rt_AssetDBSize.i; if (_asset_db.files[slot] == fid) { break; } else if (_asset_db.files[slot] == 0) { - _asset_db.files[slot] = fid; + _asset_db.files[slot] = fid; _asset_db.data[slot].last_processed = 0; + memset(&_asset_db.data[slot].resources, + 0, + sizeof(_asset_db.data[slot].resources)); + _asset_db.data[slot].resource_count = 0; + _asset_db.data[slot].in_processing = false; ++discovery_count; break; } ++i; } + rtUnlockWrite(&_asset_db.lock); if (i == (unsigned int)rt_AssetDBSize.i) { - rtLog("AC", "Failed to add %s to AssetDB, because no free slots are left.", file); + rtLog("AC", + "Failed to add %s to AssetDB, because no free slots are left.", + file); } } } while (!entry.is_last); @@ -214,24 +234,47 @@ static int DiscoverAssets(void) { static int CheckUpdatedAssets(void) { int updated_count = 0; for (int i = 0; i < rt_AssetDBSize.i; ++i) { - if (_asset_db.files[i] == 0) + rtLockRead(&_asset_db.lock); + if (_asset_db.files[i] == 0) { + rtUnlockRead(&_asset_db.lock); continue; - const char *path = rtGetFilePath(_asset_db.files[i]); + } + const char *path = rtGetFilePath(_asset_db.files[i]); uint64_t last_changed = rtGetFileModificationTimestamp(path); - if (_asset_db.data[i].last_processed < last_changed) { + if (!_asset_db.data[i].in_processing && _asset_db.data[i].last_processed < last_changed) { + + /* Check that we have not already added this file */ + rtLockConditionVar(_processing_queue.lock); + bool already_in_queue = false; + for (size_t entry_idx = _processing_queue.head; entry_idx != _processing_queue.tail; + entry_idx = (entry_idx + 1) % RT_ARRAY_COUNT(_processing_queue.entries)) { + if (_processing_queue.entries[entry_idx].fid == _asset_db.files[i]) { + already_in_queue = true; + break; + } + } + rtUnlockConditionVar(_processing_queue.lock, false); + if (already_in_queue) { + rtUnlockRead(&_asset_db.lock); + continue; + } + const char *ext = path + strlen(path); while (*ext != '.' && ext != path) --ext; if (*ext != '.') break; + bool found_processor = false; for (unsigned int j = 0; j < RT_ARRAY_COUNT(_processors); ++j) { if (strcmp(ext, _processors[j].file_ext) == 0) { rt_processing_queue_entry entry; entry.fid = _asset_db.files[i]; entry.processor_index = j; entry.db_index = i; - + + found_processor = true; + while (true) { bool inserted = false; rtLockConditionVar(_processing_queue.lock); @@ -240,7 +283,7 @@ static int CheckUpdatedAssets(void) { if (next_tail != _processing_queue.head) { _processing_queue.entries[_processing_queue.tail] = entry; _processing_queue.tail = next_tail; - inserted = true; + inserted = true; } rtUnlockConditionVar(_processing_queue.lock, inserted); if (inserted) @@ -248,8 +291,10 @@ static int CheckUpdatedAssets(void) { } } } - ++updated_count; + if (found_processor) + ++updated_count; } + rtUnlockRead(&_asset_db.lock); } return updated_count; } @@ -285,7 +330,7 @@ static void ProcessorThreadEntry(void *param) { while (_keep_running && (_processing_queue.tail == _processing_queue.head)) rtWaitOnConditionVar(_processing_queue.lock); - bool got_entry = false; + bool got_entry = false; rt_processing_queue_entry entry = {0}; if (_processing_queue.tail != _processing_queue.head) { entry = _processing_queue.entries[_processing_queue.head]; @@ -299,24 +344,47 @@ static void ProcessorThreadEntry(void *param) { if (!got_entry) continue; + rtLockWrite(&_asset_db.lock); + _asset_db.data[entry.db_index].in_processing = true; + rtUnlockWrite(&_asset_db.lock); + const char *path = rtGetFilePath(entry.fid); rtLog("AC", "Processing %s", path); rtArenaClear(&arena); - rt_result res = _processors[entry.processor_index].proc(entry.fid, &arena); + rt_resource_id existing_resources[RT_MAX_RESOURCES_PER_ASSET]; + unsigned int existing_resource_count; + rtLockRead(&_asset_db.lock); + memcpy(existing_resources, + _asset_db.data[entry.db_index].resources, + sizeof(existing_resources)); + existing_resource_count = _asset_db.data[entry.db_index].resource_count; + rtUnlockRead(&_asset_db.lock); + rt_resource_id new_resources[RT_MAX_RESOURCES_PER_ASSET]; + memset(&new_resources, 0, sizeof(new_resources)); + unsigned int new_resource_count = 0; + rt_result res = _processors[entry.processor_index].proc(entry.fid, + existing_resource_count, + existing_resources, + &new_resource_count, + new_resources, + &arena); if (res != RT_SUCCESS) { rtLog("AC", "Failed to process %s: %u", path, res); } + rtLockWrite(&_asset_db.lock); _asset_db.data[entry.db_index].last_processed = rtGetCurrentTimestamp(); + _asset_db.data[entry.db_index].in_processing = false; + memcpy(_asset_db.data[entry.db_index].resources, new_resources, sizeof(new_resources)); + _asset_db.data[entry.db_index].resource_count = new_resource_count; + rtUnlockWrite(&_asset_db.lock); } } /* Utilities for asset processors*/ #include "aio.h" -#include "asset_compiler.h" -rt_loaded_asset LoadAsset(rt_file_id file) -{ +rt_loaded_asset LoadAsset(rt_file_id file) { const char *path = rtGetFilePath(file); size_t file_size = rtGetFileSize(path); diff --git a/src/runtime/asset_compiler.h b/src/runtime/asset_compiler.h index 86f423e..3b470d3 100644 --- a/src/runtime/asset_compiler.h +++ b/src/runtime/asset_compiler.h @@ -2,10 +2,12 @@ #define RT_ASSET_COMPILER_H #ifndef RT_BUILD_ASSET_COMPILER -#error This file should only be included if RT_BUILD_ASSET_COMPILER is defined. +#error This file should only be included if RT_BUILD_ASSET_COMPILER is defined. #endif #include "file_tab.h" +#include "resources.h" +#include "mem_arena.h" #ifdef __cplusplus extern "C" { @@ -15,6 +17,22 @@ enum { RT_ASSET_PROCESSING_FAILED = RT_CUSTOM_ERROR_START, }; +#define RT_MAX_RESOURCES_PER_ASSET 32 + +/* Asset processor prototype. + * + * The new resources will replace the associated resources completely. + */ +#define RT_ASSET_PROCESSOR_FN(_Name) \ + rt_result _Name(rt_file_id file, \ + unsigned int existing_resource_count, \ + const rt_resource_id *existing_resources, \ + unsigned int *new_resource_count, \ + rt_resource_id *new_resources, \ + rt_arena *arena) +/* A Asset processor function */ +typedef RT_ASSET_PROCESSOR_FN(rt_asset_processor_fn); + /* Allocated from the buffer manager */ typedef struct { void *buffer; diff --git a/src/runtime/asset_manager.c b/src/runtime/asset_manager.c deleted file mode 100644 index e69de29..0000000 diff --git a/src/runtime/assets.h b/src/runtime/assets.h deleted file mode 100644 index e11d5c7..0000000 --- a/src/runtime/assets.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef RT_ASSETS_H -#define RT_ASSETS_H - -/* Asset system interface */ - -#endif diff --git a/src/runtime/buffer_manager.c b/src/runtime/buffer_manager.c index 5980f77..175fe2d 100644 --- a/src/runtime/buffer_manager.c +++ b/src/runtime/buffer_manager.c @@ -9,16 +9,6 @@ #include #include -#if 0 -typedef struct rt_buffer_region_s { - void *memory; - int16_t *refcounts; // One per block - uint32_t *bitmap; - size_t block_count; - rt_mutex *guard; -} rt_buffer_region; -#endif - /* Count leading zeroes. * Note that the return value of __builtin_clz(0) is undefined. */ #ifdef _MSC_VER @@ -51,222 +41,8 @@ static __forceinline bool IsLZCNTSupported(void) { #endif -#if 0 -/* NOTE(Kevin): Keep these sorted! */ -static size_t _block_sizes[] = {RT_KB(512), RT_MB(1), RT_MB(4), RT_MB(8)}; -#define NUM_BLOCK_SIZES (sizeof(_block_sizes) / sizeof(_block_sizes[0])) -static rt_buffer_region _regions[NUM_BLOCK_SIZES]; - -RT_CVAR_SZ(rt_BufferManagerMemory, - "Total number of bytes allocated for the buffer manager. Default: 1GB", - RT_GB(1)); - -rt_result InitBufferManager(void) { - if ((rt_BufferManagerMemory.sz % NUM_BLOCK_SIZES) != 0) - rtLog("BUFFERMGR", - "Configured memory amount is not dividable by number of block " - "sizes: %u MB/%u", - rt_BufferManagerMemory.sz / (1024 * 1024), - NUM_BLOCK_SIZES); - - size_t mem_per_size = rt_BufferManagerMemory.sz / NUM_BLOCK_SIZES; - for (unsigned int i = 0; i < NUM_BLOCK_SIZES; ++i) { - if ((mem_per_size % _block_sizes[i]) != 0) - rtLog("BUFFERMGR", - "Memory per block size is not dividable by block size: %u " - "MB/%u KB", - mem_per_size / (1024 * 1024), - _block_sizes[i] / 1024); - - size_t block_count = mem_per_size / _block_sizes[i]; - _regions[i].block_count = block_count; - _regions[i].guard = rtCreateMutex(); - if (!_regions[i].guard) { - rtReportError("BUFFERMGR", "Failed to create guard mutex %u", i); - return RT_BUFFER_MGR_MUTEX_CREATION_FAILED; - } - _regions[i].memory = malloc(mem_per_size); - if (!_regions[i].memory) { - rtDestroyMutex(_regions[i].guard); - rtReportError("BUFFERMGR", "Failed to allocate memory.", i); - return RT_BUFFER_MGR_OUT_OF_MEMORY; - } - _regions[i].bitmap = calloc((block_count + 31) / 32, sizeof(uint32_t)); - if (!_regions[i].bitmap) { - rtDestroyMutex(_regions[i].guard); - free(_regions[i].memory); - rtReportError("BUFFERMGR", "Failed to allocate memory.", i); - return RT_BUFFER_MGR_OUT_OF_MEMORY; - } - _regions[i].refcounts = calloc(block_count, sizeof(uint16_t)); - if (!_regions[i].refcounts) { - rtDestroyMutex(_regions[i].guard); - free(_regions[i].memory); - free(_regions[i].bitmap); - rtReportError("BUFFERMGR", "Failed to allocate memory.", i); - return RT_BUFFER_MGR_OUT_OF_MEMORY; - } - } - return RT_SUCCESS; -} - -void ShutdownBufferManager(void) { - for (unsigned int i = 0; i < NUM_BLOCK_SIZES; ++i) { - rtDestroyMutex(_regions[i].guard); - free(_regions[i].memory); - free(_regions[i].bitmap); - free(_regions[i].refcounts); - } -} - -RT_DLLEXPORT void *rtAllocBuffer(size_t size) { - assert(IsLZCNTSupported()); - - // Determine the best block size to use - size_t required_blocks = (size + _block_sizes[0] - 1) / _block_sizes[0]; - size_t best_fit = 0; - for (size_t i = 1; i < NUM_BLOCK_SIZES; ++i) { - size_t block_count = (size + _block_sizes[i] - 1) / _block_sizes[i]; - if (block_count < required_blocks && size >= _block_sizes[i]) { - required_blocks = block_count; - best_fit = i; - } - } - - void *result = NULL; - - rt_buffer_region *region = &_regions[best_fit]; - rtLockMutex(region->guard); - size_t dword_count = (region->block_count + 31) / 32; - - if (required_blocks < 32) { - /* Fast path for allocations that potentially fit into one dword */ - uint32_t in_use_mask = (1ull << required_blocks) - 1; - size_t max_occupancy = 32 - required_blocks; - for (size_t i = 0; i < dword_count; ++i) { - size_t block_index = 0; - if (region->bitmap[i] != 0 && popcnt32(region->bitmap[i]) < max_occupancy) { - size_t free_high_blocks = lzcnt32(region->bitmap[i]); - if (free_high_blocks >= required_blocks) { - /* High blocks are free */ - size_t first_free = 32 - free_high_blocks; - region->bitmap[i] |= (in_use_mask << first_free); - block_index = i * 32 + first_free; - result = (char *)region->memory + block_index * _block_sizes[best_fit]; - } else if (tzcnt32(region->bitmap[i]) >= required_blocks) { - /* Low blocks are free */ - region->bitmap[i] |= in_use_mask; - block_index = i * 32; - result = (char *)region->memory + block_index * _block_sizes[best_fit]; - } else { - /* Check if we can find a large enough range of free blocks. - * Start after the first set bit. - */ - for (uint32_t j = tzcnt32(region->bitmap[i]) + 1; j < 32 - required_blocks; - ++j) { - if ((region->bitmap[i] & in_use_mask << j) == 0) { - region->bitmap[i] |= (in_use_mask << j); - block_index = i * 32 + j; - result = (char *)region->memory + block_index * _block_sizes[best_fit]; - break; - } - } - } - } else if (region->bitmap[i] == 0) { - /* All free */ - region->bitmap[i] = in_use_mask; - block_index = i * 32; - result = (char *)region->memory + block_index * _block_sizes[best_fit]; - } else if (i < dword_count - 1) { - /* Check if we can use high blocks from this dword and low blocks from the next one - */ - size_t high_blocks = lzcnt32(region->bitmap[i]); - size_t low_blocks = - (region->bitmap[i + 1] != 0) ? tzcnt32(region->bitmap[i + 1]) : 32; - - if (high_blocks + low_blocks >= required_blocks) { - size_t high_mask = (1u << high_blocks) - 1; - size_t first_free = 32 - high_blocks; - size_t low_mask = (1u << (required_blocks - high_blocks)) - 1; - - region->bitmap[i] |= (high_mask << first_free); - region->bitmap[i + 1] |= low_mask; - block_index = i * 32 + first_free; - result = (char *)region->memory + block_index * _block_sizes[best_fit]; - } - } - - if (result) { - for (size_t j = 0; j < required_blocks; ++j) - region->refcounts[block_index + j] = 1; - break; - } - } - } else { - for (size_t i = 0; i < dword_count; ++i) { - if (region->bitmap[i] == UINT32_MAX) { - continue; - } - /* Check if we can start the allocation here */ - - } - } - rtUnlockMutex(region->guard); - return result; -} - -RT_DLLEXPORT void rtReleaseBuffer(const void *begin, size_t size) { - if (!begin) - return; - uintptr_t begin_addr = (uintptr_t)begin; - for (unsigned int i = 0; i < NUM_BLOCK_SIZES; ++i) { - uintptr_t region_addr = (uintptr_t)_regions[i].memory; - size_t region_size = _block_sizes[i] * _regions[i].block_count; - if (begin_addr >= region_addr && begin_addr + size <= region_addr + region_size) { - - size_t block_count = (size + _block_sizes[i] - 1) / _block_sizes[i]; - size_t first_block = (begin_addr - region_addr) / _block_sizes[i]; - - rtLockMutex(_regions[i].guard); - for (size_t j = 0; j < block_count; ++j) { - size_t dword = (first_block + j) / 32; - size_t bit = (first_block + j) % 32; - - if (--_regions[i].refcounts[first_block + j] == 0) - _regions[i].bitmap[dword] &= ~(1u << bit); - } - rtUnlockMutex(_regions[i].guard); - return; - } - } - rtLog("BUFFERMGR", "Tried to release an invalid buffer"); -} - -RT_DLLEXPORT void rtIncreaseBufferRefCount(const void *begin, size_t size) { - uintptr_t begin_addr = (uintptr_t)begin; - for (unsigned int i = 0; i < NUM_BLOCK_SIZES; ++i) { - uintptr_t region_addr = (uintptr_t)_regions[i].memory; - size_t region_size = _block_sizes[i] * _regions[i].block_count; - if (begin_addr >= region_addr && begin_addr + size <= region_addr + region_size) { - - size_t block_count = (size + _block_sizes[i] - 1) / _block_sizes[i]; - size_t first_block = (begin_addr - region_addr) / _block_sizes[i]; - - rtLockMutex(_regions[i].guard); - for (size_t j = 0; j < block_count; ++j) { - ++_regions[i].refcounts[first_block + j]; - } - rtUnlockMutex(_regions[i].guard); - return; - } - } - rtLog("BUFFERMGR", "Tried to increase the refcount of an invalid buffer"); -} -#endif - #define BLOCK_SIZE 4096u -static uint32_t *_refcounts; static uint32_t *_bitmap; static char *_memory; static rt_mutex *_guard; @@ -298,14 +74,12 @@ extern rt_result InitBufferManager(void) { size_t dword_count = (block_count + 31) / 32; _block_count = block_count; - _memory = malloc(budget + dword_count * sizeof(uint32_t) + block_count * sizeof(uint32_t)); + _memory = malloc(budget + dword_count * sizeof(uint32_t)); if (!_memory) { return RT_OUT_OF_MEMORY; } _bitmap = (uint32_t*)(_memory + budget); memset(_bitmap, 0, sizeof(uint32_t) * dword_count); - _refcounts = _bitmap + dword_count; - memset(_refcounts, 0, sizeof(uint32_t) * block_count); return RT_SUCCESS; } @@ -381,9 +155,6 @@ RT_DLLEXPORT void *rtAllocBuffer(size_t size) { } } - for (size_t i = first_block; i < first_block + alloc_blocks; ++i) - _refcounts[i] = 1; - rtUnlockMutex(_guard); rtLog("BUFFERMGR", "Result ptr %llx", (uintptr_t)result); return result; @@ -395,22 +166,9 @@ RT_DLLEXPORT void rtReleaseBuffer(const void *begin, size_t size) { uintptr_t first_block = off / BLOCK_SIZE; rtLockMutex(_guard); for (size_t i = first_block; i < first_block + alloc_blocks; ++i) { - if (--_refcounts[i] == 0) { - size_t dword = i / 32; - size_t bit = i % 32; - _bitmap[dword] &= ~(1u << bit); - } + size_t dword = i / 32; + size_t bit = i % 32; + _bitmap[dword] &= ~(1u << bit); } rtUnlockMutex(_guard); } - -RT_DLLEXPORT void rtIncreaseBufferRefCount(const void *begin, size_t size) { - size_t alloc_blocks = (size + BLOCK_SIZE - 1) / BLOCK_SIZE; - uintptr_t off = (uintptr_t)begin - (uintptr_t)_memory; - uintptr_t first_block = off / BLOCK_SIZE; - rtLockMutex(_guard); - for (size_t i = first_block; i < first_block + alloc_blocks; ++i) { - ++_refcounts[i]; - } - rtUnlockMutex(_guard); -} \ No newline at end of file diff --git a/src/runtime/buffer_manager.h b/src/runtime/buffer_manager.h index 5a295b1..434217a 100644 --- a/src/runtime/buffer_manager.h +++ b/src/runtime/buffer_manager.h @@ -16,8 +16,6 @@ RT_DLLEXPORT void *rtAllocBuffer(size_t size); RT_DLLEXPORT void rtReleaseBuffer(const void *begin, size_t size); -RT_DLLEXPORT void rtIncreaseBufferRefCount(const void *begin, size_t size); - #ifdef __cplusplus } #endif diff --git a/src/runtime/ds.h b/src/runtime/ds.h new file mode 100644 index 0000000..1bab093 --- /dev/null +++ b/src/runtime/ds.h @@ -0,0 +1,59 @@ +#ifndef RT_DS_H +#define RT_DS_H + +/* Datastructure Library */ + +#include "runtime.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* A minheap. + * + * The memory pointed to by keys and values is not owned by the minheap + * and instead provided by the caller. */ +typedef struct { + int *keys; + void *values; + size_t value_size; + size_t capacity; + int size; +} rt_minheap; + +/* Comparison function for rtMinheapUpdate. + * The function should return 0 if *a and *b are considered equal, + * and a different (non-zero) value if they are non-equal. + * + * Note that memcmp fits this requirement. + */ +typedef int rt_minheap_cmp_fn(const void *a, const void *b, size_t n); + +/* Takes the arrays and re-orders the values to create a minheap. */ +RT_DLLEXPORT rt_minheap +rtCreateMinheap(int *keys, void *values, size_t value_size, size_t capacity, int initial_size); + +/* Copies the value with the smallest key to min_value */ +RT_DLLEXPORT void rtMinheapPeek(const rt_minheap *minheap, void *min_value); + +/* Copies the value with the smallest key to min_value and removes it from the heap */ +RT_DLLEXPORT void rtMinheapPop(rt_minheap *minheap, void *min_value); + +/* Pushes a new value into the minheap */ +RT_DLLEXPORT void rtMinheapPush(rt_minheap *minheap, int key, const void *value); + +/* Changes the key of an existing value, or inserts it, if it's not found. + * + * Uses memcmp if cmp is NULL. */ +RT_DLLEXPORT void +rtMinheapUpdate(rt_minheap *minheap, const void *value, int new_key, rt_minheap_cmp_fn *cmp); + +static RT_INLINE int rtMinheapIsEmpty(rt_minheap *minheap) { + return minheap->size == 0; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/runtime/ds_minheap.c b/src/runtime/ds_minheap.c new file mode 100644 index 0000000..b362fe5 --- /dev/null +++ b/src/runtime/ds_minheap.c @@ -0,0 +1,138 @@ +#include "ds.h" + +#include + +extern int memcmp(const void *ptr1, const void *ptr2, size_t num); +extern void *memcpy(void *destination, const void *source, size_t num); + +/* Utilities for index calculation */ + +static RT_INLINE int Parent(int i) { + return (i - 1) / 2; +} + +static RT_INLINE int Left(int i) { + return 2 * i + 1; +} + +static RT_INLINE int Right(int i) { + return 2 * i + 2; +} + +static void Swap(rt_minheap *minheap, int i, int j, void *tmpv) { + void *vi = (char *)minheap->values + (size_t)i * minheap->value_size; + void *vj = (char *)minheap->values + (size_t)j * minheap->value_size; + int tk = minheap->keys[i]; + memcpy(tmpv, vi, minheap->value_size); + minheap->keys[i] = minheap->keys[j]; + memcpy(vi, vj, minheap->value_size); + minheap->keys[j] = tk; + memcpy(vj, tmpv, minheap->value_size); +} + +static void Heapify(rt_minheap *minheap, int start) { + int i = start; + int size = (int)minheap->capacity; + + /* FIXME: Allocate on temp-arena */ + char tmpv[256]; + assert(minheap->value_size < sizeof(tmpv) && "tmpv should be allocated on a temporary arena."); + + while (1) { + int min = i; + if (Left(i) < size && minheap->keys[Left(i)] < minheap->keys[min]) + min = Left(i); + if (Right(i) < size && minheap->keys[Right(i)] < minheap->keys[min]) + min = Right(i); + if (min == i) + break; + Swap(minheap, i, min, tmpv); + } +} + +static void Decrease(rt_minheap *minheap, int i, int newkey) { + minheap->keys[i] = newkey; + /* FIXME: Allocate on temp-arena */ + char tmpv[256]; + assert(minheap->value_size < sizeof(tmpv) && "tmpv should be allocated on a temporary arena."); + while (i > 0 && minheap->keys[i] < minheap->keys[Parent(i)]) { + Swap(minheap, i, Parent(i), tmpv); + i = Parent(i); + } +} + +static void Remove(rt_minheap *minheap, int i) { + int last = minheap->size - 1; + if (last < 0) + return; + /* FIXME: Allocate on temp-arena */ + char tmpv[256]; + assert(minheap->value_size < sizeof(tmpv) && "tmpv should be allocated on a temporary arena."); + Swap(minheap, i, last, tmpv); + minheap->size = last; + if (i != last) { + if (i == 0 || minheap->keys[i] > minheap->keys[Parent(i)]) + Heapify(minheap, i); + else + Decrease(minheap, i, minheap->keys[i]); + } +} + +/* Takes the arrays and re-orders the values to create a minheap. */ +RT_DLLEXPORT rt_minheap +rtCreateMinheap(int *keys, void *values, size_t value_size, size_t capacity, int initial_size) { + rt_minheap minheap = { + .keys = keys, + .values = values, + .value_size = value_size, + .capacity = capacity, + .size = initial_size, + }; + /* Start at the last non-leaf element */ + for (int i = initial_size / 2 - 1; i >= 0; --i) + Heapify(&minheap, i); + return minheap; +} + +/* Copies the value with the smallest key to min_value */ +RT_DLLEXPORT void rtMinheapPeek(const rt_minheap *minheap, void *min_value) { + memcpy(min_value, minheap->values, minheap->value_size); +} + +/* Copies the value with the smallest key to min_value and removes it from the heap */ +RT_DLLEXPORT void rtMinheapPop(rt_minheap *minheap, void *min_value) { + rtMinheapPeek(minheap, min_value); + Remove(minheap, 0); +} + +/* Pushes a new value into the minheap */ +RT_DLLEXPORT void rtMinheapPush(rt_minheap *minheap, int key, const void *value) { + int at = minheap->size; + void *v = (char *)minheap->values + (size_t)at * minheap->value_size; + minheap->keys[at] = key; + memcpy(v, value, minheap->value_size); + ++minheap->size; + Decrease(minheap, at, key); +} + +/* Changes the key of an existing value, or inserts it, if it's not found. + * + * Uses memcmp if cmp is NULL. */ +RT_DLLEXPORT void +rtMinheapUpdate(rt_minheap *minheap, const void *value, int new_key, rt_minheap_cmp_fn *cmp) { + if (!cmp) + cmp = memcmp; + for (int i = 0; i < minheap->size; ++i) { + void *v = (char *)minheap->values + (size_t)i * minheap->value_size; + if (cmp(v, value, minheap->value_size) == 0) { + if (minheap->keys[i] > new_key) { + Decrease(minheap, i, new_key); + } else if (minheap->keys[i] < new_key) { + Remove(minheap, i); + rtMinheapPush(minheap, new_key, value); + } + return; + } + } + rtMinheapPush(minheap, new_key, value); +} \ No newline at end of file diff --git a/src/runtime/dxc_shader_compiler.cpp b/src/runtime/dxc_shader_compiler.cpp index 3b49140..81d91d4 100644 --- a/src/runtime/dxc_shader_compiler.cpp +++ b/src/runtime/dxc_shader_compiler.cpp @@ -22,7 +22,7 @@ extern "C" rt_shader_bytecode CompileVulkanShader(rt_shader_stage stage, // Check if this is what we want. // For example: 6_2 is what allows the usage of 16 bit types LPCWSTR target_profile = nullptr; - LPWSTR entry = nullptr; + LPCWSTR entry = nullptr; switch (stage) { case RT_SHADER_STAGE_VERTEX: target_profile = L"vs_6_1"; @@ -41,7 +41,7 @@ extern "C" rt_shader_bytecode CompileVulkanShader(rt_shader_stage stage, return bc; } - LPWSTR optimization_arg = nullptr; + LPCWSTR optimization_arg = nullptr; switch (optimization) { case RT_SHADER_OPTIMIZATION_NONE: optimization_arg = L"-Od"; @@ -81,7 +81,7 @@ extern "C" rt_shader_bytecode CompileVulkanShader(rt_shader_stage stage, utils->Release(); compiler->Release(); library->Release(); - rtReportError("AC", "Failed to init the DXC ínclude handler."); + rtReportError("AC", "Failed to init the DXC include handler."); return bc; } @@ -118,8 +118,9 @@ extern "C" rt_shader_bytecode CompileVulkanShader(rt_shader_stage stage, // Error occured IDxcBlobEncoding *error_blob; hr = result->GetErrorBuffer(&error_blob); - if (SUCCEEDED(hr) && error_blob) { - rtLog("AC", "Shader %s compilation failed: %s", (const char *)error_blob->GetBufferPointer()); + if (SUCCEEDED(hr) && error_blob && error_blob->GetBufferSize() > 0) { + const char *msg = (const char *)error_blob->GetBufferPointer(); + rtLog("AC", "Shader %s compilation failed: %s", file_path, msg); error_blob->Release(); } else { rtLog("AC", "Shader %s compilation failed. No error information available!"); diff --git a/src/runtime/error_report.c b/src/runtime/error_report.c index 85e17eb..4e254ff 100644 --- a/src/runtime/error_report.c +++ b/src/runtime/error_report.c @@ -57,12 +57,12 @@ static void LogOut(const char *text) { RT_DLLEXPORT void rtReportError(const char *subsystem, const char *fmt, ...) { char buf[256]; - int at = snprintf(buf, RT_ARRAY_COUNT(buf) - 1, "[%s] ", subsystem); + int at = rtSPrint(buf, RT_ARRAY_COUNT(buf) - 1, "[%s] ", subsystem); va_list ap; va_start(ap, fmt); - at += vsnprintf(&buf[at], RT_ARRAY_COUNT(buf) - at - 1, fmt, ap); + at += rtVSPrint(&buf[at], RT_ARRAY_COUNT(buf) - at - 1, fmt, ap); va_end(ap); - at += snprintf(&buf[at], RT_ARRAY_COUNT(buf) - at - 1, "\n"); + at += rtSPrint(&buf[at], RT_ARRAY_COUNT(buf) - at - 1, "\n"); LogOut(buf); if (DisplayErrorBox(buf)) { @@ -73,12 +73,12 @@ RT_DLLEXPORT void rtReportError(const char *subsystem, const char *fmt, ...) { RT_DLLEXPORT void rtLog(const char *subsystem, const char *fmt, ...) { char buf[256]; - int at = snprintf(buf, RT_ARRAY_COUNT(buf) - 1, "[%s] ", subsystem); + int at = rtSPrint(buf, RT_ARRAY_COUNT(buf), "[%s] ", subsystem); va_list ap; va_start(ap, fmt); - at += vsnprintf(&buf[at], RT_ARRAY_COUNT(buf) - at - 1, fmt, ap); + at += rtVSPrint(&buf[at], RT_ARRAY_COUNT(buf) - at, fmt, ap); va_end(ap); - at += snprintf(&buf[at], RT_ARRAY_COUNT(buf) - at - 1, "\n"); + at += rtSPrint(&buf[at], RT_ARRAY_COUNT(buf) - at, "\n"); LogOut(buf); } \ No newline at end of file diff --git a/src/runtime/fsutils.c b/src/runtime/fsutils.c index 2488122..9007893 100644 --- a/src/runtime/fsutils.c +++ b/src/runtime/fsutils.c @@ -84,6 +84,12 @@ RT_DLLEXPORT void rtCloseDirectory(rt_scandir_handle *dir) { dir->handle = NULL; } +RT_DLLEXPORT bool rtCreateDirectory(const char *path) { + WCHAR wpath[MAX_PATH]; + MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, path, -1, wpath, MAX_PATH); + return CreateDirectoryW(wpath, NULL); +} + RT_DLLEXPORT size_t rtGetFileSize(const char *path) { WCHAR wpath[MAX_PATH]; MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, path, -1, wpath, MAX_PATH); diff --git a/src/runtime/fsutils.h b/src/runtime/fsutils.h index b6ef011..1dfd9ba 100644 --- a/src/runtime/fsutils.h +++ b/src/runtime/fsutils.h @@ -29,6 +29,8 @@ RT_DLLEXPORT rt_dirent rtNextDirectoryEntry(rt_scandir_handle *dir); RT_DLLEXPORT void rtCloseDirectory(rt_scandir_handle *dir); +RT_DLLEXPORT bool rtCreateDirectory(const char *path); + RT_DLLEXPORT size_t rtGetFileSize(const char *path); RT_DLLEXPORT uint64_t rtGetFileModificationTimestamp(const char *path); diff --git a/src/runtime/init.c b/src/runtime/init.c index 2dd7b89..efce06f 100644 --- a/src/runtime/init.c +++ b/src/runtime/init.c @@ -11,6 +11,9 @@ extern rt_cvar rt_WindowHeight; extern rt_cvar rt_BufferMemoryBudget; extern rt_cvar rt_FileTabCapacity; extern rt_cvar rt_MaxConcurrentAsyncIO; +extern rt_cvar rt_ResourceDirectory; +extern rt_cvar rt_ResourceCacheSize; +extern rt_cvar rt_ResourceNamespaceSize; #ifdef RT_BUILD_ASSET_COMPILER extern rt_cvar rt_AssetDirectory; @@ -24,6 +27,9 @@ void RegisterRuntimeCVars(void) { rtRegisterCVAR(&rt_BufferMemoryBudget); rtRegisterCVAR(&rt_FileTabCapacity); rtRegisterCVAR(&rt_MaxConcurrentAsyncIO); + rtRegisterCVAR(&rt_ResourceDirectory); + rtRegisterCVAR(&rt_ResourceCacheSize); + rtRegisterCVAR(&rt_ResourceNamespaceSize); #ifdef RT_BUILD_ASSET_COMPILER rtRegisterCVAR(&rt_AssetDirectory); #endif @@ -37,6 +43,8 @@ extern rt_result InitFileTab(void); extern void ShutdownFileTab(void); extern rt_result InitAIO(void); extern void ShutdownAIO(void); +extern rt_result InitResourceManager(void); +extern void ShutdownResourceManager(void); #ifdef RT_BUILD_ASSET_COMPILER extern rt_result InitAssetCompiler(void); @@ -63,6 +71,11 @@ RT_DLLEXPORT rt_result rtInitRuntime(void) { return res; } + if ((res = InitResourceManager()) != RT_SUCCESS) { + rtReportError("RESMGR", "Init failed."); + return res; + } + #ifdef RT_BUILD_ASSET_COMPILER if ((res = InitAssetCompiler()) != RT_SUCCESS) { rtReportError("AC", "Init failed."); @@ -77,6 +90,7 @@ RT_DLLEXPORT void rtShutdownRuntime(void) { #ifdef RT_BUILD_ASSET_COMPILER ShutdownAssetCompiler(); #endif + ShutdownResourceManager(); ShutdownAIO(); ShutdownFileTab(); ShutdownBufferManager(); diff --git a/src/runtime/pipeline_processor.c b/src/runtime/pipeline_processor.c index da16bd2..f3356b3 100644 --- a/src/runtime/pipeline_processor.c +++ b/src/runtime/pipeline_processor.c @@ -1,14 +1,14 @@ -#include "runtime.h" -#include "mem_arena.h" -#include "description_parser.h" -#include "buffer_manager.h" #include "asset_compiler.h" -#include "shader_compiler.h" -#include "gfx.h" +#include "buffer_manager.h" #include "config.h" +#include "description_parser.h" +#include "gfx.h" +#include "mem_arena.h" +#include "runtime.h" +#include "shader_compiler.h" -#include #include +#include #include typedef struct { @@ -16,9 +16,13 @@ typedef struct { rt_attribute_binding *storage_bindings; rt_attribute_binding *texture_bindings; - rt_shader_bytecode vertex_shader; - rt_shader_bytecode fragment_shader; - rt_shader_bytecode compute_shader; + rt_resource shaders[3]; + char *shader_names[3]; + unsigned int shader_count; + + unsigned int vertex_shader; + unsigned int fragment_shader; + unsigned int compute_shader; /* TODO Fixed function settings */ @@ -144,6 +148,58 @@ static bool ParseBindings(rt_parse_state *state, } } +static char *GenerateShaderName(rt_shader_type type, + rt_shader_stage stage, + rt_shader_optimization_level optimization, + const char *file_name, + rt_arena *arena) { + size_t name_len = strlen(file_name) + 5 /* type */ + + 5 /* stage */ + + 3 /* optimization */ + + 1; /* '\0' */ + char *res_name = rtArenaPush(arena, name_len); + if (!res_name) + return NULL; + const char *type_str = NULL; + switch (type) { + case RT_SHADER_TYPE_VULKAN: + type_str = ":vk"; + break; + default: + return NULL; + } + const char *stage_str = NULL; + switch (stage) { + case RT_SHADER_STAGE_VERTEX: + stage_str = ":vert"; + break; + case RT_SHADER_STAGE_FRAGMENT: + stage_str = ":frag"; + break; + case RT_SHADER_STAGE_COMPUTE: + stage_str = ":comp"; + break; + default: + return NULL; + } + const char *optim_str = NULL; + switch (optimization) { + case RT_SHADER_OPTIMIZATION_NONE: + optim_str = ":O0"; + break; + case RT_SHADER_OPTIMIZATION_SIZE: + optim_str = ":Os"; + break; + case RT_SHADER_OPTIMIZATION_SPEED: + optim_str = ":Ox"; + break; + default: + return NULL; + } + rtSPrint(res_name, name_len, "%s%s%s%s", file_name, type_str, stage_str, optim_str); + return res_name; +} + static rt_result ParseShader(rt_parse_state *state, unsigned int root_list, const char *name, @@ -151,7 +207,8 @@ static rt_result ParseShader(rt_parse_state *state, rt_shader_type type, rt_shader_stage stage, rt_shader_optimization_level optimization, - rt_shader_bytecode *p_shader_bytecode, + rt_resource *p_resource, + char **p_resource_name, rt_arena *arena) { const rt_parsed_stmt *stmt = rtFindStatement(state, root_list, name); if (stmt) { @@ -195,10 +252,32 @@ static rt_result ParseShader(rt_parse_state *state, if (in_file_type == type) { if (shader->form == RT_STMT_FORM_BLOCK) { /* Inline code */ - *p_shader_bytecode = + rt_shader_bytecode bytecode = CompileShader(type, stage, optimization, shader->block, file_path, arena); - if (!p_shader_bytecode->bytes) + if (!bytecode.bytes) return RT_ASSET_PROCESSING_FAILED; + + *p_resource_name = + GenerateShaderName(type, stage, optimization, file_path, arena); + if (!*p_resource_name) + return RT_ASSET_PROCESSING_FAILED; + + rt_resource resource; + resource.type = RT_RESOURCE_SHADER; + resource.dependency_count = 0; + resource.subresource_count = 0; + resource.data = rtArenaPush(arena, sizeof(rt_shader_info) + bytecode.len); + if (!resource.data) + return RT_ASSET_PROCESSING_FAILED; + rt_shader_info *shader_info = resource.data; + uint8_t *shader_bytecode = (uint8_t *)(shader_info + 1); + shader_info->stage = stage; + shader_info->type = type; + shader_info->bytecode_length = bytecode.len; + rtSetRelptr(&shader_info->bytecode, shader_bytecode); + memcpy(shader_bytecode, bytecode.bytes, bytecode.len); + + memcpy(p_resource, &resource, sizeof(resource)); break; } else if (shader->form != RT_STMT_FORM_VALUE) { /* A filename */ @@ -262,8 +341,9 @@ static rt_result ParsePipelineFile(rt_file_id fid, } /* We allow the pipeline file to overwrite the optimization level */ - rt_shader_optimization_level optimization = ParseOptimizationLevel(&state, root_list, file_path); - + rt_shader_optimization_level optimization = + ParseOptimizationLevel(&state, root_list, file_path); + rt_shader_type type = RT_SHADER_TYPE_INVALID; if (strcmp(rt_Renderer.s, "vk") == 0) type = RT_SHADER_TYPE_VULKAN; @@ -275,42 +355,65 @@ static rt_result ParsePipelineFile(rt_file_id fid, } /* Process shader stages */ - if (ParseShader(&state, - root_list, - "vertex", - file_path, - type, - RT_SHADER_STAGE_VERTEX, - optimization, - &pipeline->vertex_shader, - arena) == RT_ASSET_PROCESSING_FAILED) { - result = RT_ASSET_PROCESSING_FAILED; + result = ParseShader(&state, + root_list, + "vertex", + file_path, + type, + RT_SHADER_STAGE_VERTEX, + optimization, + &pipeline->shaders[pipeline->shader_count], + &pipeline->shader_names[pipeline->shader_count], + arena); + if (result == RT_SUCCESS) { + pipeline->vertex_shader = pipeline->shader_count; + ++pipeline->shader_count; + } else if (result == RT_SHADER_NOT_PRESENT) { + pipeline->vertex_shader = UINT_MAX; + } else { goto out; } - if (ParseShader(&state, - root_list, - "fragment", - file_path, - type, - RT_SHADER_STAGE_FRAGMENT, - optimization, - &pipeline->fragment_shader, - arena) == RT_ASSET_PROCESSING_FAILED) { - result = RT_ASSET_PROCESSING_FAILED; + result = RT_SUCCESS; + + result = ParseShader(&state, + root_list, + "fragment", + file_path, + type, + RT_SHADER_STAGE_FRAGMENT, + optimization, + &pipeline->shaders[pipeline->shader_count], + &pipeline->shader_names[pipeline->shader_count], + arena); + if (result == RT_SUCCESS) { + pipeline->fragment_shader = pipeline->shader_count; + ++pipeline->shader_count; + } else if (result == RT_SHADER_NOT_PRESENT) { + pipeline->fragment_shader = UINT_MAX; + } else { goto out; } - if (ParseShader(&state, - root_list, - "compute", - file_path, - type, - RT_SHADER_STAGE_COMPUTE, - optimization, - &pipeline->compute_shader, - arena) == RT_ASSET_PROCESSING_FAILED) { - result = RT_ASSET_PROCESSING_FAILED; + result = RT_SUCCESS; + + result = ParseShader(&state, + root_list, + "compute", + file_path, + type, + RT_SHADER_STAGE_COMPUTE, + optimization, + &pipeline->shaders[pipeline->shader_count], + &pipeline->shader_names[pipeline->shader_count], + arena); + if (result == RT_SUCCESS) { + pipeline->compute_shader = pipeline->shader_count; + ++pipeline->shader_count; + } else if (result == RT_SHADER_NOT_PRESENT) { + pipeline->compute_shader = UINT_MAX; + } else { goto out; } + result = RT_SUCCESS; /* Process bindings */ pipeline->texture_bindings = NULL; @@ -354,18 +457,74 @@ out: return result; } -rt_result PipelineProcessor(rt_file_id file, rt_arena *arena) { +RT_ASSET_PROCESSOR_FN(PipelineProcessor) { rt_loaded_asset asset = LoadAsset(file); if (!asset.buffer) return RT_UNKNOWN_ERROR; rt_parsed_pipeline_data pipeline; + memset(&pipeline, 0, sizeof(pipeline)); rt_result result = ParsePipelineFile(file, asset.buffer, asset.size, &pipeline, arena); if (result != RT_SUCCESS) goto out; - + rt_resource_id shader_resources[3] = {0}; + result = rtCreateResources(pipeline.shader_count, + pipeline.shader_names, + pipeline.shaders, + shader_resources); + if (result != RT_SUCCESS) + goto out; + rt_resource pipeline_resource = {0}; + pipeline_resource.type = RT_RESOURCE_PIPELINE; + pipeline_resource.dependency_count = pipeline.shader_count; + memcpy(pipeline_resource.dependencies, shader_resources, sizeof(shader_resources)); + pipeline_resource.subresource_count = 0; + + size_t data_size = + sizeof(rt_pipeline_info) + sizeof(rt_attribute_binding) * (pipeline.texture_binding_count + + pipeline.uniform_binding_count + + pipeline.storage_binding_count); + pipeline_resource.data = rtArenaPush(arena, data_size); + if (!pipeline_resource.data) { + result = RT_OUT_OF_MEMORY; + goto out; + } + rt_pipeline_info *info = pipeline_resource.data; + memset(info, 0, sizeof(*info)); + info->vertex_shader = (pipeline.vertex_shader != UINT_MAX) + ? shader_resources[pipeline.vertex_shader] + : RT_INVALID_RESOURCE_ID; + info->fragment_shader = (pipeline.fragment_shader != UINT_MAX) + ? shader_resources[pipeline.fragment_shader] + : RT_INVALID_RESOURCE_ID; + info->compute_shader = (pipeline.compute_shader != UINT_MAX) + ? shader_resources[pipeline.compute_shader] + : RT_INVALID_RESOURCE_ID; + rt_attribute_binding *uniform_bindings = (rt_attribute_binding *)(info + 1); + if (pipeline.uniform_binding_count > 0) { + memcpy(uniform_bindings, pipeline.uniform_bindings, pipeline.uniform_binding_count); + rtSetRelptr(&info->uniform_bindings, uniform_bindings); + } + rt_attribute_binding *texture_bindings = (uniform_bindings + pipeline.uniform_binding_count); + if (pipeline.texture_binding_count > 0) { + memcpy(texture_bindings, pipeline.texture_bindings, pipeline.texture_binding_count); + rtSetRelptr(&info->texture_bindings, texture_bindings); + } + rt_attribute_binding *storage_bindings = (texture_bindings + pipeline.texture_binding_count); + if (pipeline.texture_binding_count > 0) { + memcpy(storage_bindings, pipeline.storage_bindings, pipeline.storage_binding_count); + rtSetRelptr(&info->storage_bindings, storage_bindings); + } + rt_resource_id pipeline_id; + const char *name = rtGetFilePath(file); + result = rtCreateResources(1, &name, &pipeline_resource, &pipeline_id); + if (result == RT_SUCCESS) { + new_resources[0] = pipeline_id; + memcpy(&new_resources[1], shader_resources, sizeof(shader_resources)); + *new_resource_count = 1 + pipeline.shader_count; + } out: rtReleaseBuffer(asset.buffer, asset.size); return result; diff --git a/src/runtime/renderer_api.h b/src/runtime/renderer_api.h index 54ab309..8be231e 100644 --- a/src/runtime/renderer_api.h +++ b/src/runtime/renderer_api.h @@ -7,7 +7,7 @@ #include "gfx.h" #include "runtime.h" -#include "assets.h" +#include "resources.h" #ifdef __cplusplus extern "C" { @@ -31,10 +31,9 @@ struct rt_renderer_init_info_s { }; typedef struct { - /* rt_uid vertex_shader; - rt_uid fragment_shader; - rt_uid compute_shader; - */ + rt_resource_id vertex_shader; + rt_resource_id fragment_shader; + rt_resource_id compute_shader; rt_relptr texture_bindings; rt_relptr uniform_bindings; @@ -45,6 +44,26 @@ typedef struct { uint16_t storage_binding_count; } rt_pipeline_info; +typedef enum { + RT_SHADER_TYPE_INVALID, + RT_SHADER_TYPE_VULKAN, + + RT_SHADER_TYPE_COUNT, +} rt_shader_type; + +typedef enum { + RT_SHADER_STAGE_VERTEX, + RT_SHADER_STAGE_FRAGMENT, + RT_SHADER_STAGE_COMPUTE, +} rt_shader_stage; + +typedef struct { + rt_shader_type type; + rt_shader_stage stage; + rt_relptr bytecode; + size_t bytecode_length; +} rt_shader_info; + typedef void rt_register_renderer_cvars_fn(void); typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info); typedef void rt_shutdown_renderer_fn(void); diff --git a/src/runtime/resource_manager.c b/src/runtime/resource_manager.c new file mode 100644 index 0000000..ae751c4 --- /dev/null +++ b/src/runtime/resource_manager.c @@ -0,0 +1,444 @@ +#include "aio.h" +#include "buffer_manager.h" +#include "config.h" +#include "ds.h" +#include "file_tab.h" +#include "fsutils.h" +#include "hashing.h" +#include "renderer_api.h" +#include "resources.h" +#include "threading.h" + +#include +#include +#include + +RT_CVAR_S(rt_ResourceDirectory, "The directory used for storing resources. Default: res", "res"); +RT_CVAR_I(rt_ResourceCacheSize, + "The maximum amount of memory used for caching resources. Default: 512MB", + RT_MB(512)); +RT_CVAR_I(rt_MaxCachedResources, + "The maximum number of simultaneously cached resources. Default: 1024", + 1024); +RT_CVAR_I(rt_ResourceNamespaceSize, + "The maximum number of resources that can exist. Default: 1.048.576", + 1048576); + +#define RT_TOMBSTONE_ID 1 + +typedef struct { + void *buffer; + size_t size; + int next_free; + int usage_counter; +} rt_cached_resource; + +typedef struct { + unsigned int index; + rt_resource_id id; +} rt_cached_resource_ref; + +typedef struct { + void *mem; + + rt_minheap reclaim_heap; + + /* Used to lookup cached resources by id */ + rt_resource_id *resource_ids; + unsigned int *resource_indices; + + rt_cached_resource *resources; + unsigned int first_free; + + size_t current_size; + + rt_rwlock lock; +} rt_resource_cache; + +typedef struct { + rt_file_id file; + size_t offset; + size_t size; +} rt_resource_ref; + +typedef struct { + rt_resource_id *ids; + rt_resource_ref *refs; + rt_rwlock lock; +} rt_resource_namespace; + +/* ~~~ Utilities ~~~ */ + +static size_t GetResourceDataSize(const rt_resource *resource) { + switch (resource->type) { + case RT_RESOURCE_PIPELINE: + return sizeof(rt_pipeline_info); + case RT_RESOURCE_SHADER: { + /* Sizeof metadata + bytecode */ + const rt_shader_info *info = resource->data; + return sizeof(rt_shader_info) + (info) ? info->bytecode_length : 0; + } break; + default: + rtLog("RESMGR", "Tried to get size of an invalid resource type %u", resource->type); + } + return 0; +} + +static void CopyResourceData(const rt_resource *resource, void *dest) { + switch (resource->type) { + case RT_RESOURCE_PIPELINE: + memcpy(dest, resource->data, sizeof(rt_pipeline_info)); + break; + case RT_RESOURCE_SHADER: { + /* Sizeof metadata + bytecode */ + const rt_shader_info *info = resource->data; + rt_shader_info *dest_info = dest; + memcpy(dest_info, info, sizeof(*info)); + memcpy(dest_info + 1, rtResolveConstRelptr(&info->bytecode), info->bytecode_length); + rtSetRelptr(&dest_info->bytecode, (void *)(dest_info + 1)); + } break; + default: + rtLog("RESMGR", "Tried to get copy a resource of invalid type %u", resource->type); + } +} + +/* ~~~ Cache ~~~ */ + +static rt_resource_cache _cache; + +static rt_result InitResourceCache(void) { + int count = rt_MaxCachedResources.i; + if (count == 0) { + rtReportError("RESMGR", "rt_MaxCachedResources must be greater than 0."); + return RT_INVALID_VALUE; + } + + size_t required_mem = (size_t)count * (sizeof(rt_cached_resource_ref) + sizeof(int) + + sizeof(rt_cached_resource)) + + 2 * (size_t)count * (sizeof(rt_resource_id) + sizeof(unsigned int)); + + void *mem = malloc(required_mem); + if (!mem) + return RT_OUT_OF_MEMORY; + rt_create_rwlock_result lock_create = rtCreateRWLock(); + if (!lock_create.ok) { + free(mem); + return RT_UNKNOWN_ERROR; + } + memset(mem, 0, required_mem); + _cache.mem = mem; + + int *reclaim_keys = mem; + rt_cached_resource_ref *reclaim_refs = (rt_cached_resource_ref *)reclaim_keys + count; + _cache.reclaim_heap = rtCreateMinheap(reclaim_keys, + reclaim_refs, + sizeof(rt_cached_resource_ref), + (size_t)count, + 0); + + _cache.current_size = 0; + _cache.resources = (rt_cached_resource *)(reclaim_keys + count); + _cache.lock = lock_create.lock; + + for (int i = 0; i < count; ++i) { + _cache.resources[i].next_free = (i < count - 1) ? i + 1 : UINT_MAX; + } + _cache.first_free = 0; + + _cache.resource_ids = (rt_resource_id *)(_cache.resources + count); + _cache.resource_indices = (unsigned int *)(_cache.resource_ids + 2 * count); + + return RT_SUCCESS; +} + +static void ShutdownResourceCache(void) { + free(_cache.mem); + rtDestroyRWLock(&_cache.lock); + memset(&_cache, 0, sizeof(_cache)); +} + +static bool FreeCacheSpace(size_t space) { + size_t total_freed = 0; + while (total_freed < space && !rtMinheapIsEmpty(&_cache.reclaim_heap)) { + rt_cached_resource_ref ref; + rtMinheapPop(&_cache.reclaim_heap, &ref); + + rt_cached_resource *res = &_cache.resources[ref.index]; + rtReleaseBuffer(res->buffer, res->size); + total_freed += res->size; + + res->next_free = _cache.first_free; + _cache.first_free = ref.index; + res->usage_counter = 0; + res->buffer = NULL; + res->size = 0; + + /* Remove from lookup table */ + size_t ht_size = (size_t)rt_MaxCachedResources.i * 2; + for (size_t off = 0; off < ht_size; ++off) { + size_t slot = (ref.id + off) % ht_size; + if (_cache.resource_ids[slot] == ref.id) { + _cache.resource_ids[slot] = RT_TOMBSTONE_ID; + break; + } else if (_cache.resource_ids[slot] == RT_INVALID_RESOURCE_ID) { + break; + } + } + } + return total_freed >= space; +} + +static unsigned int FindCachedResource(rt_resource_id id) { + size_t ht_size = (size_t)rt_MaxCachedResources.i * 2; + for (size_t off = 0; off < ht_size; ++off) { + size_t slot = (id + off) % ht_size; + if (_cache.resource_ids[slot] == id) + return _cache.resource_indices[slot]; + else if (_cache.resource_ids[slot] == RT_INVALID_RESOURCE_ID) + return UINT_MAX; + } + return UINT_MAX; +} + +static rt_resource *CacheResource(rt_resource_id id, const rt_resource *res) { + rt_resource *cached = NULL; + rtLockWrite(&_cache.lock); + unsigned int index = FindCachedResource(id); + if (index != UINT_MAX) { + rt_cached_resource_ref ref = {.id = id, .index = index}; + rt_cached_resource *cache_entry = &_cache.resources[index]; + ++cache_entry->usage_counter; + rtMinheapUpdate(&_cache.reclaim_heap, &ref, cache_entry->usage_counter, NULL); + cached = cache_entry->buffer; + } else { + /* Insert into cache */ + size_t total_size = sizeof(rt_resource) + GetResourceDataSize(res); + if (_cache.current_size + total_size >= (size_t)rt_ResourceCacheSize.i) { + if (!FreeCacheSpace(total_size)) { + rtLog("RESMGR", + "Unable to reclaim %zu kB from the resource cache.", + total_size / 1024); + rtUnlockWrite(&_cache.lock); + return NULL; + } + RT_ASSERT(_cache.first_free != UINT_MAX, + "There must be a free cache entry after space was freed."); + } + + void *buffer = rtAllocBuffer(total_size); + if (!buffer) { + rtLog("RESMG", "Unable to allocate %zu kB for the new resource.", total_size / 1024); + rtUnlockWrite(&_cache.lock); + return NULL; + } + memcpy(buffer, res, sizeof(rt_resource)); + cached = buffer; + cached->data = (void *)(cached + 1); + CopyResourceData(res, cached->data); + + index = _cache.first_free; + _cache.first_free = _cache.resources[index].next_free; + _cache.resources[index].buffer = buffer; + _cache.resources[index].usage_counter = 1; + _cache.resources[index].size = total_size; + _cache.resources[index].next_free = UINT_MAX; + + rt_cached_resource_ref reclaim_ref = { + .id = id, + .index = index, + }; + rtMinheapPush(&_cache.reclaim_heap, 1, &reclaim_ref); + + /* Insert into lookup table */ + bool inserted = false; + size_t ht_size = (size_t)rt_MaxCachedResources.i * 2; + for (size_t off = 0; off < ht_size; ++off) { + size_t slot = (id + off) % ht_size; + if (_cache.resource_ids[slot] == RT_INVALID_RESOURCE_ID || + _cache.resource_ids[slot] == RT_TOMBSTONE_ID || _cache.resource_ids[slot] == id) { + _cache.resource_indices[slot] = index; + inserted = true; + break; + } + } + if (!inserted) { + rtReportError("RESMGR", + "Failed to insert created resource into the resource lookup table."); + } + } + rtUnlockWrite(&_cache.lock); + return cached; +} + +/* ~~~ Resource Namespace ~~~ */ + +static rt_resource_namespace _namespace; + +static rt_result InitResourceNamespace(void) { + size_t size = (size_t)rt_ResourceNamespaceSize.i; + if (size == 0) { + rtReportError("RESMGR", "rt_ResourceNamespaceSize must be greater than 0."); + return RT_INVALID_VALUE; + } + + void *mem = calloc(size, sizeof(rt_resource_id) + sizeof(rt_resource_ref)); + if (!mem) + return RT_OUT_OF_MEMORY; + rt_create_rwlock_result lock_create = rtCreateRWLock(); + if (!lock_create.ok) { + free(mem); + return RT_UNKNOWN_ERROR; + } + _namespace.lock = lock_create.lock; + _namespace.ids = mem; + _namespace.refs = (rt_resource_ref *)(_namespace.ids + size); + return RT_SUCCESS; +} + +static void ShutdownNamespace(void) { + rtDestroyRWLock(&_namespace.lock); + free(_namespace.ids); + memset(&_namespace, 0, sizeof(_namespace)); +} + +#if 0 +static rt_resource_ref *GetResourceRefPtr(rt_resource_id id) { + rt_resource_ref *ref = NULL; + rtLockRead(&_namespace.lock); + size_t ns_size = (size_t)rt_ResourceNamespaceSize.i; + for (size_t j = 0; j < ns_size; ++j) { + size_t at = (id + j) % ns_size; + if (_namespace.ids[at] == RT_INVALID_RESOURCE_ID) { + break; + } else if (_namespace.ids[at] == id) { + ref = &_namespace.refs[at]; + break; + } + } + rtUnlockRead(&_namespace.lock); + return ref; +} +#endif + +/* Fills the passed write struct with the necessary information to save the resource to a file */ +static void +PrepareResourceFlushToFile(rt_resource_id id, const rt_resource *resource, rt_file_write *write) { + /* A file write needs one contiguous buffer */ + RT_ASSERT(((uintptr_t)resource->data == (uintptr_t)resource + sizeof(*resource)), + "The resource must reside in the cache, to ensure the correct memory layout"); + + char file_path[260]; + rtSPrint(file_path, 260, "%s/%llx.bin", rt_ResourceDirectory.s, id); + + write->file = rtAddFile(file_path); + write->buffer = resource; + write->offset = 0; + write->num_bytes = sizeof(rt_resource) + GetResourceDataSize(resource); +} + +/* ~~~ Public API ~~~ */ + +rt_result InitResourceManager(void) { + if (!rtCreateDirectory(rt_ResourceDirectory.s)) + rtLog("RESMGR", "CreateDirectory(%s) failed.", rt_ResourceDirectory.s); + + rt_result res; + if ((res = InitResourceCache()) != RT_SUCCESS) + return res; + if ((res = InitResourceNamespace()) != RT_SUCCESS) { + ShutdownResourceCache(); + return res; + } + return RT_SUCCESS; +} + +void ShutdownResourceManager(void) { + ShutdownResourceCache(); + ShutdownNamespace(); +} + +RT_DLLEXPORT rt_result rtCreateResources(uint32_t count, + const char **names, + const rt_resource *resources, + rt_resource_id *ids) { + rt_result result = RT_SUCCESS; + size_t ns_size = (size_t)rt_ResourceNamespaceSize.i; + + rt_write_batch writes = {.num_writes = 0}; + rt_aio_handle write_handles[RT_WRITE_BATCH_MAX_SIZE]; + uint32_t outstanding_writes = 0; + + rtLockWrite(&_namespace.lock); + for (uint32_t i = 0; i < count; ++i) { + size_t name_len = strlen(names[i]); + rt_resource_id id = (rt_resource_id)rtHashBytes(names[i], name_len); + if (id == RT_INVALID_RESOURCE_ID || id == RT_TOMBSTONE_ID) + id = ~id; + + bool inserted = false; + for (size_t j = 0; j < ns_size; ++j) { + size_t at = (id + j) % ns_size; + if (_namespace.ids[at] == RT_INVALID_RESOURCE_ID) { + inserted = true; + + ids[i] = id; + + const rt_resource *cached_resource = CacheResource(id, &resources[i]); + + PrepareResourceFlushToFile(id, cached_resource, &writes.writes[writes.num_writes]); + _namespace.ids[at] = id; + _namespace.refs[at].offset = writes.writes[writes.num_writes].offset; + _namespace.refs[at].size = writes.writes[writes.num_writes].num_bytes; + _namespace.refs[at].file = writes.writes[writes.num_writes].file; + ++writes.num_writes; + break; + } else if (_namespace.ids[at] == id) { + rtReportError("RESMGR", + "Resource ID collision occured with resource %s.\nID: %llx", + names[i], + id); + result = RT_INVALID_FILE_ID; + goto out; + } + } + if (!inserted) { + result = RT_OUT_OF_MEMORY; + goto out; + } + + if (writes.num_writes == RT_WRITE_BATCH_MAX_SIZE || + (i == count - 1 && writes.num_writes > 0)) { + if (outstanding_writes > 0) { + /* Wait until the previous batch is finished */ + for (uint32_t k = 0; k < outstanding_writes; ++k) { + if (rtWaitForAIOCompletion(write_handles[k]) != RT_AIO_STATE_FINISHED) { + rtReportError("RESMGR", "Resource write failed."); + result = RT_UNKNOWN_ERROR; + goto out; + } + rtReleaseAIO(write_handles[k]); + } + } + outstanding_writes = writes.num_writes; + if (rtSubmitWriteBatch(&writes, write_handles) != RT_SUCCESS) { + rtReportError("RESMGR", "Failed to submit resource writes."); + result = RT_UNKNOWN_ERROR; + goto out; + } + } + } + + if (outstanding_writes > 0) { + /* Wait until the last batch is finished */ + for (uint32_t i = 0; i < outstanding_writes; ++i) { + if (rtWaitForAIOCompletion(write_handles[i]) != RT_AIO_STATE_FINISHED) { + rtReportError("RESMGR", "Resource write failed."); + result = RT_UNKNOWN_ERROR; + } + rtReleaseAIO(write_handles[i]); + } + } +out: + rtUnlockWrite(&_namespace.lock); + return result; +} \ No newline at end of file diff --git a/src/runtime/resources.h b/src/runtime/resources.h new file mode 100644 index 0000000..6bee8d6 --- /dev/null +++ b/src/runtime/resources.h @@ -0,0 +1,72 @@ +#ifndef RT_RESOURCES_H +#define RT_RESOURCES_H + +/* Resource system interface + * + * To differentiate the two ideas, we called processed assets "resources" + * and the source files "assets". + * + * For example a .pipeline file is an asset, while a compiled pipeline in + * a binary file is a resource. + * + * Furthermore, a single asset file might contain multiple resources, + * i.e. a single texture file might be turned into multiple resources for the different mip-levels. + */ + +#include + +#include "runtime.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Identifies a single resource + * + * This is a hash of the resource name. */ +typedef uint64_t rt_resource_id; + +#define RT_INVALID_RESOURCE_ID 0u + +typedef enum { + /* Compiled shader code */ + RT_RESOURCE_SHADER, + + /* A pipeline state object */ + RT_RESOURCE_PIPELINE, +} rt_resource_type; + +#define RT_MAX_SUBRESOURCES 32 +#define RT_MAX_RESOURCE_DEPENDENCIES 32 + +typedef struct { + /* Points to the resource data. The size of which is determined by the type. */ + void *data; + rt_resource_type type; + + /* Subresources are necessary to complete the resource. + * For example, a texture might contain different mip-levels as sub-resources. */ + uint32_t subresource_count; + rt_resource_id subresources[RT_MAX_SUBRESOURCES]; + + /* Dependencies reference distinct resources that are necessary to use this resource. + * For example, a model file might depend on its materials */ + uint32_t dependency_count; + rt_resource_id dependencies[RT_MAX_RESOURCE_DEPENDENCIES]; +} rt_resource; + +/* Registers resources with the resource manager, making them available to the system. + * + * The runtime will create a standalone file for each resource in the resource directory. + * To package them, you will need to use a separate tool. + */ +RT_DLLEXPORT rt_result rtCreateResources(uint32_t count, + const char **names, + const rt_resource *resources, + rt_resource_id *ids); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 79fe30f..a080307 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -3,6 +3,7 @@ /* basic types and macros */ +#include #include #include @@ -22,7 +23,6 @@ extern "C" { #define RT_INLINE inline __attribute__((always_inline)) #endif - #define RT_UNUSED(x) ((void)sizeof((x))) #define RT_ARRAY_COUNT(x) (sizeof((x)) / sizeof((x)[0])) @@ -34,8 +34,9 @@ typedef unsigned int rt_result; /* Default result codes */ enum { - RT_SUCCESS = 0, + RT_SUCCESS = 0, RT_OUT_OF_MEMORY = 1, + RT_INVALID_VALUE = 2, RT_CUSTOM_ERROR_START, @@ -47,6 +48,13 @@ typedef struct { unsigned int length; } rt_text_span; +/* snprintf replacement. + * Always returns a zero terminated string. + */ +RT_DLLEXPORT int rtSPrint(char *dest, size_t n, const char *fmt, ...); + +RT_DLLEXPORT int rtVSPrint(char *dest, size_t n, const char *fmt, va_list ap); + /* Returns results like strcmp(): * - If the first differing character is smaller in span than in cmp: < 0 * - If span and cmp are equal: 0 @@ -60,13 +68,35 @@ RT_DLLEXPORT void rtReportError(const char *subsystem, const char *fmt, ...); RT_DLLEXPORT void rtLog(const char *subsystem, const char *fmt, ...); +#ifndef NDEBUG +#ifdef _MSC_VER +#define RT_DEBUGBREAK __debugbreak() +#elif defined(__clang__) && __has_builtin(__bultin_debugtrap) +#define RT_DEBUGBREAK __builtin_debugtrap() +#elif defined(__GNUC__) +#define RT_DEBUGBREAK __builtin_trap() +#endif + +RT_DLLEXPORT int rtAssertHandler(const char *expr, const char *msg, const char *file, int line); +#define RT_ASSERT(x, msg) \ + do { \ + if (!(x)) { \ + if (rtAssertHandler(#x, (msg), __FILE__, __LINE__) == 0) { \ + RT_DEBUGBREAK; \ + } \ + } \ + } while (0) +#else +#define RT_ASSERT(x, msg) RT_UNUSED(x) +#endif + enum { - RT_INVALID_UNICODE = RT_CUSTOM_ERROR_START, + RT_INVALID_UNICODE = RT_CUSTOM_ERROR_START, RT_INSUFFICIENT_BUFFER, }; /* Returns RT_SUCCESS if the string was successfully converted, - * RT_INVALID_UNICODE if invalid unicode characters were encountered or + * RT_INVALID_UNICODE if invalid unicode characters were encountered or * RT_INSUFFICIENT_BUFFER if the provided output buffer is too small */ RT_DLLEXPORT rt_result rtUTF8ToWStr(const char *utf8, wchar_t *wstr, size_t len); @@ -89,6 +119,15 @@ static RT_INLINE void *rtResolveRelptr(rt_relptr *ptr) { } } +static RT_INLINE const void *rtResolveConstRelptr(const rt_relptr *ptr) { + if (ptr->off != 0) { + const char *p = (const char *)ptr; + return (const void *)(p + (ptrdiff_t)ptr->off); + } else { + return NULL; + } +} + static RT_INLINE void rtSetRelptr(rt_relptr *ptr, void *target) { if (target) { char *p = (char *)ptr, *t = (char *)target; diff --git a/src/runtime/shader_compiler.h b/src/runtime/shader_compiler.h index bc9912a..67a11a6 100644 --- a/src/runtime/shader_compiler.h +++ b/src/runtime/shader_compiler.h @@ -4,23 +4,12 @@ #include "mem_arena.h" #include "runtime.h" +#include "renderer_api.h" + #ifdef __cplusplus extern "C" { #endif -typedef enum { - RT_SHADER_TYPE_INVALID, - RT_SHADER_TYPE_VULKAN, - - RT_SHADER_TYPE_COUNT, -} rt_shader_type; - -typedef enum { - RT_SHADER_STAGE_VERTEX, - RT_SHADER_STAGE_FRAGMENT, - RT_SHADER_STAGE_COMPUTE, -} rt_shader_stage; - typedef enum { RT_SHADER_OPTIMIZATION_NONE, RT_SHADER_OPTIMIZATION_SPEED, diff --git a/src/runtime/sprint.c b/src/runtime/sprint.c new file mode 100644 index 0000000..2dfd672 --- /dev/null +++ b/src/runtime/sprint.c @@ -0,0 +1,47 @@ +#include "runtime.h" + +#include + +#ifdef __SANITIZE_ADDRESS__ +/* stb_sprintf has issues with ASAN under msvc */ +#define ENABLE_STB_SPRINTF 0 +#else +#define ENABLE_STB_SPRINTF 1 +#endif + +#if ENABLE_STB_SPRINTF +#pragma warning(push, 0) +#define STB_SPRINTF_IMPLEMENTATION +#define STB_SPRINTF_STATIC +#include +#pragma warning(pop) +#else +#include +#endif + +RT_DLLEXPORT int rtSPrint(char *dest, size_t n, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); +#if ENABLE_STB_SPRINTF + int r = stbsp_vsnprintf(dest, (int)n, fmt, ap); +#else + int r = vsnprintf(dest, n, fmt, ap); + if (r >= (int)n) + dest[n - 1] = '\0'; +#endif + va_end(ap); + return r; +} + +RT_DLLEXPORT int rtVSPrint(char *dest, size_t n, const char *fmt, va_list ap) +{ +#if ENABLE_STB_SPRINTF + return stbsp_vsnprintf(dest, (int)n, fmt, ap); +#else + int r = vsnprintf(dest, n, fmt, ap); + if (r >= (int)n) + dest[n - 1] = '\0'; + return r; +#endif +} \ No newline at end of file