diff --git a/benchmark/amd7950x_avx2/joint_grid.csv b/benchmark/amd7950x_avx2/joint_grid.csv index e9b31334b..113289ac2 100644 --- a/benchmark/amd7950x_avx2/joint_grid.csv +++ b/benchmark/amd7950x_avx2/joint_grid.csv @@ -1,9 +1,9 @@ threads,ms -1,2664.01 -2,1567.06 -3,1080.91 -4,872.047 -5,761.89 -6,675.953 -7,621.694 -8,574.929 +1,2565.3 +2,1533.85 +3,1053.01 +4,865.702 +5,748.989 +6,658.623 +7,613.633 +8,578.973 diff --git a/benchmark/amd7950x_avx2/large_pyramid.csv b/benchmark/amd7950x_avx2/large_pyramid.csv index cec353916..516f542c5 100644 --- a/benchmark/amd7950x_avx2/large_pyramid.csv +++ b/benchmark/amd7950x_avx2/large_pyramid.csv @@ -1,9 +1,9 @@ threads,ms -1,1570.29 -2,871.185 -3,613.11 -4,471.278 -5,401.635 -6,353.07 -7,311.721 -8,307.579 +1,1584.03 +2,841.862 +3,598.747 +4,471.697 +5,396.306 +6,347.983 +7,309.086 +8,308.832 diff --git a/benchmark/amd7950x_avx2/many_pyramids.csv b/benchmark/amd7950x_avx2/many_pyramids.csv index 1fa3ef3ff..6a0f4358b 100644 --- a/benchmark/amd7950x_avx2/many_pyramids.csv +++ b/benchmark/amd7950x_avx2/many_pyramids.csv @@ -1,9 +1,9 @@ threads,ms -1,2682.25 -2,1481.3 -3,977.918 -4,767.89 -5,605.041 -6,520.386 -7,446.211 -8,419.113 +1,2663.34 +2,1407.85 +3,934.506 +4,725.271 +5,590.288 +6,502.513 +7,422.693 +8,395.369 diff --git a/benchmark/amd7950x_avx2/rain.csv b/benchmark/amd7950x_avx2/rain.csv index ec696a51d..849ad421d 100644 --- a/benchmark/amd7950x_avx2/rain.csv +++ b/benchmark/amd7950x_avx2/rain.csv @@ -1,9 +1,9 @@ threads,ms -1,6819.14 -2,4114.75 -3,3104.05 -4,2562.53 -5,2182.8 -6,1946.25 -7,1759.5 -8,1628.84 +1,6526.9 +2,4017.74 +3,3052.28 +4,2493.5 +5,2149.12 +6,1911.45 +7,1735.97 +8,1625.71 diff --git a/benchmark/amd7950x_avx2/smash.csv b/benchmark/amd7950x_avx2/smash.csv index bc1a066eb..3dd380467 100644 --- a/benchmark/amd7950x_avx2/smash.csv +++ b/benchmark/amd7950x_avx2/smash.csv @@ -1,9 +1,9 @@ threads,ms -1,1722.58 -2,1081.59 -3,812.772 -4,673.528 -5,591.922 -6,536.279 -7,502.897 -8,479.575 +1,1562.06 +2,1020.55 +3,781.193 +4,661.318 +5,580.115 +6,530.502 +7,489.672 +8,467.998 diff --git a/benchmark/amd7950x_avx2/spinner.csv b/benchmark/amd7950x_avx2/spinner.csv index f9d9f6160..091eb9040 100644 --- a/benchmark/amd7950x_avx2/spinner.csv +++ b/benchmark/amd7950x_avx2/spinner.csv @@ -1,9 +1,9 @@ threads,ms -1,4341.12 -2,2694.07 -3,2050.24 -4,1641.69 -5,1442.97 -6,1286.99 -7,1175.04 -8,1121.46 +1,4106.89 +2,2635.21 +3,1986.32 +4,1614.21 +5,1420.63 +6,1272.06 +7,1157.16 +8,1091.25 diff --git a/benchmark/amd7950x_avx2/tumbler.csv b/benchmark/amd7950x_avx2/tumbler.csv index d2febcfeb..1f68a4ca1 100644 --- a/benchmark/amd7950x_avx2/tumbler.csv +++ b/benchmark/amd7950x_avx2/tumbler.csv @@ -1,9 +1,9 @@ threads,ms -1,1743.49 -2,1096.53 -3,824.097 -4,675.961 -5,592.045 -6,526.278 -7,489.226 -8,467.981 +1,1613.69 +2,1061.88 +3,810.738 +4,670.874 +5,578.959 +6,519.473 +7,479.842 +8,437.851 diff --git a/src/atomic.h b/src/atomic.h index 19457780c..139a91926 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -3,82 +3,77 @@ #pragma once -#include - -// Compare to SDL_AtomicInt - -struct b3AtomicInt -{ - int value; -}; +#include "core.h" -struct b3AtomicU32 -{ - uint32_t value; -}; +#include #if defined( _MSC_VER ) - #include - -static inline void b3AtomicStoreInt( b2AtomicInt* obj, int desired ) -{ - _InterlockedExchange( (volatile long*)obj, (long)desired ); -} - -static inline int b3AtomicLoadInt( volatile int* obj ) -{ - return _InterlockedOr( (volatile long*)obj, 0 ); -} - -static inline void b3AtomicStoreUInt( volatile uint32_t* obj, uint32_t desired ) -{ - _InterlockedExchange( (volatile long*)obj, (long)desired ); -} +#include +#endif -static inline int b3AtomicFetchAddInt( volatile int* obj, int increment ) +static inline void b2AtomicStoreInt( b2AtomicInt* a, int value ) { - return _InterlockedExchangeAdd( (volatile long*)obj, (long)increment ); +#if defined( _MSC_VER ) + (void)_InterlockedExchange( (long*)&a->value, value ); +#elif defined( __GNUC__ ) || defined( __clang__ ) + __atomic_store_n( &a->value, value, __ATOMIC_SEQ_CST ); +#else +#error "Unsupported platform" +#endif } -static inline bool b3AtomicCompareExchangeInt( volatile int* obj, int* expected, int desired ) +static inline int b2AtomicLoadInt( b2AtomicInt* a ) { - int original = _InterlockedCompareExchange( (volatile long*)obj, (long)desired, (long)*expected ); - if ( original == *expected ) - { - return true; - } - - *expected = original; - return false; -} - +#if defined( _MSC_VER ) + return _InterlockedOr( (long*)&a->value, 0 ); #elif defined( __GNUC__ ) || defined( __clang__ ) - -static inline void b3AtomicStoreInt( volatile int* obj, int desired ) -{ - __atomic_store_n( obj, desired, __ATOMIC_SEQ_CST ); + return __atomic_load_n( &a->value, __ATOMIC_SEQ_CST ); +#else +#error "Unsupported platform" +#endif } -static inline int b3AtomicLoadInt( volatile int* obj ) +static inline int b2AtomicFetchAddInt( b2AtomicInt* a, int increment ) { - return __atomic_load_n( obj, __ATOMIC_SEQ_CST ); +#if defined( _MSC_VER ) + return _InterlockedExchangeAdd( (long*)&a->value, (long)increment ); +#elif defined( __GNUC__ ) || defined( __clang__ ) + return __atomic_fetch_add( &a->value, increment, __ATOMIC_SEQ_CST ); +#else +#error "Unsupported platform" +#endif } -static inline void b3AtomicStoreUInt( volatile uint32_t* obj, uint32_t desired ) +static inline bool b2AtomicCompareExchangeInt( b2AtomicInt* a, int expected, int desired ) { - __atomic_store_n( obj, desired, __ATOMIC_SEQ_CST ); +#if defined( _MSC_VER ) + return _InterlockedCompareExchange( (long*)&a->value, (long)desired, (long)expected ) == expected; +#elif defined( __GNUC__ ) || defined( __clang__ ) + // The value written to expected is ignored + return __atomic_compare_exchange_n( &a->value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ); +#else +#error "Unsupported platform" +#endif } -static inline int b3AtomicFetchAddInt( volatile int* obj, int arg ) +static inline void b2AtomicStoreU32( b2AtomicU32* a, uint32_t value ) { - return __atomic_fetch_add( obj, arg, __ATOMIC_SEQ_CST ); +#if defined( _MSC_VER ) + (void)_InterlockedExchange( (long*)&a->value, value ); +#elif defined( __GNUC__ ) || defined( __clang__ ) + __atomic_store_n( &a->value, value, __ATOMIC_SEQ_CST ); +#else +#error "Unsupported platform" +#endif } -static inline bool b3AtomicCompareExchangeInt( volatile int* obj, int* expected, int desired ) +static inline uint32_t b2AtomicLoadU32( b2AtomicU32* a ) { - return __atomic_compare_exchange_n( obj, expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ); -} - +#if defined( _MSC_VER ) + return (uint32_t)_InterlockedOr( (long*)&a->value, 0 ); +#elif defined( __GNUC__ ) || defined( __clang__ ) + return __atomic_load_n( &a->value, __ATOMIC_SEQ_CST ); #else - #error "Unsupported platform" +#error "Unsupported platform" #endif +} diff --git a/src/broad_phase.c b/src/broad_phase.c index e898f13b2..5801ad10b 100644 --- a/src/broad_phase.c +++ b/src/broad_phase.c @@ -9,6 +9,7 @@ #include "aabb.h" #include "array.h" +#include "atomic.h" #include "body.h" #include "contact.h" #include "core.h" diff --git a/src/core.c b/src/core.c index 594e8416e..4e39eb5c4 100644 --- a/src/core.c +++ b/src/core.c @@ -27,7 +27,7 @@ #endif -#include "box2d/math_functions.h" +#include "atomic.h" // This allows the user to change the length units at runtime float b2_lengthUnitsPerMeter = 1.0f; @@ -71,6 +71,7 @@ b2Version b2GetVersion( void ) return ( b2Version ){ 3, 1, 0 }; } +#if 0 #if defined( _MSC_VER ) #include #endif @@ -141,6 +142,7 @@ uint32_t b2AtomicLoadU32( b2AtomicU32* a ) #error "Unsupported platform" #endif } +#endif static b2AllocFcn* b2_allocFcn = NULL; static b2FreeFcn* b2_freeFcn = NULL; diff --git a/src/core.h b/src/core.h index fbdb134fd..5f540ce18 100644 --- a/src/core.h +++ b/src/core.h @@ -142,6 +142,7 @@ typedef struct b2AtomicU32 uint32_t value; } b2AtomicU32; +#if 0 void b2AtomicStoreInt( b2AtomicInt* a, int value ); int b2AtomicLoadInt( b2AtomicInt* a ); int b2AtomicFetchAddInt( b2AtomicInt* a, int increment ); @@ -149,3 +150,4 @@ bool b2AtomicCompareExchangeInt( b2AtomicInt* obj, int expected, int desired ); void b2AtomicStoreU32( b2AtomicU32* a, uint32_t value ); uint32_t b2AtomicLoadU32( b2AtomicU32* a ); +#endif diff --git a/src/solver.c b/src/solver.c index dc179f108..81e670807 100644 --- a/src/solver.c +++ b/src/solver.c @@ -4,6 +4,7 @@ #include "solver.h" #include "array.h" +#include "atomic.h" #include "bitset.h" #include "body.h" #include "contact.h" diff --git a/src/table.c b/src/table.c index 0e22da9d6..00e238066 100644 --- a/src/table.c +++ b/src/table.c @@ -3,6 +3,7 @@ #include "table.h" +#include "atomic.h" #include "core.h" #include "ctz.h" diff --git a/test/test_table.c b/test/test_table.c index 721b9c786..2d33de78f 100644 --- a/test/test_table.c +++ b/test/test_table.c @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2023 Erin Catto // SPDX-License-Identifier: MIT +#include "atomic.h" #include "core.h" #include "ctz.h" #include "table.h" @@ -64,7 +65,7 @@ int TableTest( void ) #if B2_SNOOP_TABLE_COUNTERS extern b2AtomicInt b2_probeCount; - b2AtomicStoreInt(&b2_probeCount, 0); + b2AtomicStoreInt( &b2_probeCount, 0 ); #endif // Test key search