diff --git a/rtcore.h b/rtcore.h index bdc4672..279393c 100644 --- a/rtcore.h +++ b/rtcore.h @@ -53,7 +53,15 @@ * This assume x64, which is what I'm always using. * You still need to build with the appropriate compilation * flags (-march=avx2 etc. for gcc, /arch:AVX2 etc. for msvc) */ -#include +#if defined(__has_include) && !defined(RTC_NO_SIMD) + #if !__has_include() + #define RTC_NO_SIMD + #else + #include + #endif +#elif !defined(RTC_NO_SIMD) + #include +#endif /* MSVC additionally has intrin.h for microsoft specific intrinsics */ #ifdef _MSC_VER #include @@ -128,6 +136,9 @@ typedef int32_t b32; } #define force_inline __forceinline +#else + #define assert(x) + #define force_inline inline #endif /* Arena allocator */ @@ -241,58 +252,66 @@ RTC_API s8 ReadEntireFileS8(s8 path, arena *a); RTC_API b32 WriteEntireFile(s8 path, byte *data, isize length); /* Atomics */ -#if defined(__GNUC__) || defined(__clang__) - /* Atomic add */ - #define AtomicAdd32(_addend, _val) __atomic_add_fetch((i32 *)_addend, _val, __ATOMIC_SEQ_CST) - #define AtomicAdd64(_addend, _val) __atomic_add_fetch((i64 *)_addend, _val, __ATOMIC_SEQ_CST) - #define AtomicStore(_ptr, _val) __atomic_store_n(_ptr, _val, __ATOMIC_SEQ_CST) - #define AtomicStoreRelease(_ptr, _val) __atomic_store_n(_ptr, _val, __ATOMIC_RELEASE) - #define AtomicLoad(_ptr) __atomic_load_n(_ptr, __ATOMIC_SEQ_CST) - #define AtomicLoadAcquire(_ptr) __atomic_load_n(_ptr, __ATOMIC_ACQUIRE) -#elif defined(_MSC_VER) - #define AtomicAdd32(_addend, _val) _InterlockedExchangeAdd((volatile long *)_addend, _val) - #define AtomicAdd64(_addend, _val) _InterlockedExchangeAdd64((volatile __int64 *)_addend, _val) - #define AtomicStore(_ptr, _val) _InterlockedExchange((volatile long *)_ptr, _val) - #define AtomicStoreRelease(_ptr, _val) _InterlockedExchange_HLERelease(_ptr, _val) - #define AtomicLoad(_ptr) _InterlockedOr(_ptr, 0) - #define AtomicLoadAcquire(_ptr) _InterlockedOr_HLEAcquire(_ptr, 0) +#if !defined(RTC_NO_ATOMICS) + #if defined(__GNUC__) || defined(__clang__) + /* Atomic add */ + #define AtomicAdd32(_addend, _val) __atomic_add_fetch((i32 *)_addend, _val, __ATOMIC_SEQ_CST) + #define AtomicAdd64(_addend, _val) __atomic_add_fetch((i64 *)_addend, _val, __ATOMIC_SEQ_CST) + #define AtomicStore(_ptr, _val) __atomic_store_n(_ptr, _val, __ATOMIC_SEQ_CST) + #define AtomicStoreRelease(_ptr, _val) __atomic_store_n(_ptr, _val, __ATOMIC_RELEASE) + #define AtomicLoad(_ptr) __atomic_load_n(_ptr, __ATOMIC_SEQ_CST) + #define AtomicLoadAcquire(_ptr) __atomic_load_n(_ptr, __ATOMIC_ACQUIRE) + #elif defined(_MSC_VER) + #define AtomicAdd32(_addend, _val) _InterlockedExchangeAdd((volatile long *)_addend, _val) + #define AtomicAdd64(_addend, _val) _InterlockedExchangeAdd64((volatile __int64 *)_addend, _val) + #define AtomicStore(_ptr, _val) _InterlockedExchange((volatile long *)_ptr, _val) + #define AtomicStoreRelease(_ptr, _val) _InterlockedExchange_HLERelease(_ptr, _val) + #define AtomicLoad(_ptr) _InterlockedOr(_ptr, 0) + #define AtomicLoadAcquire(_ptr) _InterlockedOr_HLEAcquire(_ptr, 0) + #else + #define RTC_NO_ATOMICS + #endif #endif /* Aliases for intrinsics */ -#if defined(__GNUC__) || defined(__clang__) - #define CTZ32(ui) __builtin_ctz(ui) - #define CTZ64(ul) __builtin_ctzl(ul) - #define CLZ32(ui) __builtin_clz(ui) - #define CLZ64(ul) __builtin_clzl(ul) - #define PopCount32(_x) __builtin_popcount(_x) - #define PopCount64(_x) __builtin_popcountl(_x) -#elif defined(_MSC_VER) -static force_inline unsigned int CTZ32(u32 x) -{ - unsigned int index; - _BitScanReverse(&index, x); - return index; -} -static force_inline unsigned int CTZ64(u64 x) -{ - unsigned int index; - _BitScanReverse64(&index, x); - return index; -} -static force_inline unsigned int CLZ32(u32 x) -{ - unsigned int index; - _BitScanForward(&index, x); - return index; -} -static force_inline unsigned int CLZ64(u64 x) -{ - unsigned int index; - _BitScanForward64(&index, x); - return index; -} - #define PopCount32(_x) __popcnt(_x) - #define PopCount64(_x) __popcnt64(_x) +#if !defined(RTC_NO_INTRINSICS) + #if defined(__GNUC__) || defined(__clang__) + #define CTZ32(ui) __builtin_ctz(ui) + #define CTZ64(ul) __builtin_ctzl(ul) + #define CLZ32(ui) __builtin_clz(ui) + #define CLZ64(ul) __builtin_clzl(ul) + #define PopCount32(_x) __builtin_popcount(_x) + #define PopCount64(_x) __builtin_popcountl(_x) + #elif defined(_MSC_VER) + static force_inline unsigned int CTZ32(u32 x) + { + unsigned int index; + _BitScanReverse(&index, x); + return index; + } + static force_inline unsigned int CTZ64(u64 x) + { + unsigned int index; + _BitScanReverse64(&index, x); + return index; + } + static force_inline unsigned int CLZ32(u32 x) + { + unsigned int index; + _BitScanForward(&index, x); + return index; + } + static force_inline unsigned int CLZ64(u64 x) + { + unsigned int index; + _BitScanForward64(&index, x); + return index; + } + #define PopCount32(_x) __popcnt(_x) + #define PopCount64(_x) __popcnt64(_x) + #else + #define RTC_NO_INTRINSICS + #endif #endif /* Threading wrapper */ diff --git a/test.c b/test.c index 4d3ed96..5a08bbf 100644 --- a/test.c +++ b/test.c @@ -6,8 +6,13 @@ THREAD_FN(TestThread) { int *p = param; +#ifndef RTC_NO_ATOMICS printf("Thread got param %d\n", AtomicLoad(p)); AtomicStore(p, 42); +#else + printf("Thread go param %d\n", *p); + *p = 42; +#endif return 0; } @@ -44,7 +49,11 @@ ThreadTest(void) int p = 32; thread *t = StartThread(TestThread, &p); JoinThread(t); +#ifndef RTC_NO_ATOMICS return AtomicLoad(&p) == 42; +#else + return p == 42; +#endif } internal int