Skip to content

Commit

Permalink
inline atomics are faster
Browse files Browse the repository at this point in the history
  • Loading branch information
erincatto committed Jan 22, 2025
1 parent 69fb5a0 commit 03a8ad9
Show file tree
Hide file tree
Showing 14 changed files with 117 additions and 114 deletions.
16 changes: 8 additions & 8 deletions benchmark/amd7950x_avx2/joint_grid.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,ms
1,2664.01
2,1567.06
3,1080.91
4,872.047
5,761.89
6,675.953
7,621.694
8,574.929
1,2565.3
2,1533.85
3,1053.01
4,865.702
5,748.989
6,658.623
7,613.633
8,578.973
16 changes: 8 additions & 8 deletions benchmark/amd7950x_avx2/large_pyramid.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,ms
1,1570.29
2,871.185
3,613.11
4,471.278
5,401.635
6,353.07
7,311.721
8,307.579
1,1584.03
2,841.862
3,598.747
4,471.697
5,396.306
6,347.983
7,309.086
8,308.832
16 changes: 8 additions & 8 deletions benchmark/amd7950x_avx2/many_pyramids.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,ms
1,2682.25
2,1481.3
3,977.918
4,767.89
5,605.041
6,520.386
7,446.211
8,419.113
1,2663.34
2,1407.85
3,934.506
4,725.271
5,590.288
6,502.513
7,422.693
8,395.369
16 changes: 8 additions & 8 deletions benchmark/amd7950x_avx2/rain.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,ms
1,6819.14
2,4114.75
3,3104.05
4,2562.53
5,2182.8
6,1946.25
7,1759.5
8,1628.84
1,6526.9
2,4017.74
3,3052.28
4,2493.5
5,2149.12
6,1911.45
7,1735.97
8,1625.71
16 changes: 8 additions & 8 deletions benchmark/amd7950x_avx2/smash.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,ms
1,1722.58
2,1081.59
3,812.772
4,673.528
5,591.922
6,536.279
7,502.897
8,479.575
1,1562.06
2,1020.55
3,781.193
4,661.318
5,580.115
6,530.502
7,489.672
8,467.998
16 changes: 8 additions & 8 deletions benchmark/amd7950x_avx2/spinner.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,ms
1,4341.12
2,2694.07
3,2050.24
4,1641.69
5,1442.97
6,1286.99
7,1175.04
8,1121.46
1,4106.89
2,2635.21
3,1986.32
4,1614.21
5,1420.63
6,1272.06
7,1157.16
8,1091.25
16 changes: 8 additions & 8 deletions benchmark/amd7950x_avx2/tumbler.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,ms
1,1743.49
2,1096.53
3,824.097
4,675.961
5,592.045
6,526.278
7,489.226
8,467.981
1,1613.69
2,1061.88
3,810.738
4,670.874
5,578.959
6,519.473
7,479.842
8,437.851
107 changes: 51 additions & 56 deletions src/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,82 +3,77 @@

#pragma once

#include <stdint.h>

// Compare to SDL_AtomicInt

struct b3AtomicInt
{
int value;
};
#include "core.h"

struct b3AtomicU32
{
uint32_t value;
};
#include <stdint.h>

#if defined( _MSC_VER )
#include <intrin.h>

static inline void b3AtomicStoreInt( b2AtomicInt* obj, int desired )
{
_InterlockedExchange( (volatile long*)obj, (long)desired );
}

static inline int b3AtomicLoadInt( volatile int* obj )
{
return _InterlockedOr( (volatile long*)obj, 0 );
}

static inline void b3AtomicStoreUInt( volatile uint32_t* obj, uint32_t desired )
{
_InterlockedExchange( (volatile long*)obj, (long)desired );
}
#include <intrin.h>
#endif

static inline int b3AtomicFetchAddInt( volatile int* obj, int increment )
static inline void b2AtomicStoreInt( b2AtomicInt* a, int value )
{
return _InterlockedExchangeAdd( (volatile long*)obj, (long)increment );
#if defined( _MSC_VER )
(void)_InterlockedExchange( (long*)&a->value, value );
#elif defined( __GNUC__ ) || defined( __clang__ )
__atomic_store_n( &a->value, value, __ATOMIC_SEQ_CST );
#else
#error "Unsupported platform"
#endif
}

static inline bool b3AtomicCompareExchangeInt( volatile int* obj, int* expected, int desired )
static inline int b2AtomicLoadInt( b2AtomicInt* a )
{
int original = _InterlockedCompareExchange( (volatile long*)obj, (long)desired, (long)*expected );
if ( original == *expected )
{
return true;
}

*expected = original;
return false;
}

#if defined( _MSC_VER )
return _InterlockedOr( (long*)&a->value, 0 );
#elif defined( __GNUC__ ) || defined( __clang__ )

static inline void b3AtomicStoreInt( volatile int* obj, int desired )
{
__atomic_store_n( obj, desired, __ATOMIC_SEQ_CST );
return __atomic_load_n( &a->value, __ATOMIC_SEQ_CST );
#else
#error "Unsupported platform"
#endif
}

static inline int b3AtomicLoadInt( volatile int* obj )
static inline int b2AtomicFetchAddInt( b2AtomicInt* a, int increment )
{
return __atomic_load_n( obj, __ATOMIC_SEQ_CST );
#if defined( _MSC_VER )
return _InterlockedExchangeAdd( (long*)&a->value, (long)increment );
#elif defined( __GNUC__ ) || defined( __clang__ )
return __atomic_fetch_add( &a->value, increment, __ATOMIC_SEQ_CST );
#else
#error "Unsupported platform"
#endif
}

static inline void b3AtomicStoreUInt( volatile uint32_t* obj, uint32_t desired )
static inline bool b2AtomicCompareExchangeInt( b2AtomicInt* a, int expected, int desired )
{
__atomic_store_n( obj, desired, __ATOMIC_SEQ_CST );
#if defined( _MSC_VER )
return _InterlockedCompareExchange( (long*)&a->value, (long)desired, (long)expected ) == expected;
#elif defined( __GNUC__ ) || defined( __clang__ )
// The value written to expected is ignored
return __atomic_compare_exchange_n( &a->value, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST );
#else
#error "Unsupported platform"
#endif
}

static inline int b3AtomicFetchAddInt( volatile int* obj, int arg )
static inline void b2AtomicStoreU32( b2AtomicU32* a, uint32_t value )
{
return __atomic_fetch_add( obj, arg, __ATOMIC_SEQ_CST );
#if defined( _MSC_VER )
(void)_InterlockedExchange( (long*)&a->value, value );
#elif defined( __GNUC__ ) || defined( __clang__ )
__atomic_store_n( &a->value, value, __ATOMIC_SEQ_CST );
#else
#error "Unsupported platform"
#endif
}

static inline bool b3AtomicCompareExchangeInt( volatile int* obj, int* expected, int desired )
static inline uint32_t b2AtomicLoadU32( b2AtomicU32* a )
{
return __atomic_compare_exchange_n( obj, expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST );
}

#if defined( _MSC_VER )
return (uint32_t)_InterlockedOr( (long*)&a->value, 0 );
#elif defined( __GNUC__ ) || defined( __clang__ )
return __atomic_load_n( &a->value, __ATOMIC_SEQ_CST );
#else
#error "Unsupported platform"
#error "Unsupported platform"
#endif
}
1 change: 1 addition & 0 deletions src/broad_phase.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "aabb.h"
#include "array.h"
#include "atomic.h"
#include "body.h"
#include "contact.h"
#include "core.h"
Expand Down
4 changes: 3 additions & 1 deletion src/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#endif

#include "box2d/math_functions.h"
#include "atomic.h"

// This allows the user to change the length units at runtime
float b2_lengthUnitsPerMeter = 1.0f;
Expand Down Expand Up @@ -71,6 +71,7 @@ b2Version b2GetVersion( void )
return ( b2Version ){ 3, 1, 0 };
}

#if 0
#if defined( _MSC_VER )
#include <intrin.h>
#endif
Expand Down Expand Up @@ -141,6 +142,7 @@ uint32_t b2AtomicLoadU32( b2AtomicU32* a )
#error "Unsupported platform"
#endif
}
#endif

static b2AllocFcn* b2_allocFcn = NULL;
static b2FreeFcn* b2_freeFcn = NULL;
Expand Down
2 changes: 2 additions & 0 deletions src/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,12 @@ typedef struct b2AtomicU32
uint32_t value;
} b2AtomicU32;

#if 0
void b2AtomicStoreInt( b2AtomicInt* a, int value );
int b2AtomicLoadInt( b2AtomicInt* a );
int b2AtomicFetchAddInt( b2AtomicInt* a, int increment );
bool b2AtomicCompareExchangeInt( b2AtomicInt* obj, int expected, int desired );

void b2AtomicStoreU32( b2AtomicU32* a, uint32_t value );
uint32_t b2AtomicLoadU32( b2AtomicU32* a );
#endif
1 change: 1 addition & 0 deletions src/solver.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "solver.h"

#include "array.h"
#include "atomic.h"
#include "bitset.h"
#include "body.h"
#include "contact.h"
Expand Down
1 change: 1 addition & 0 deletions src/table.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "table.h"

#include "atomic.h"
#include "core.h"
#include "ctz.h"

Expand Down
3 changes: 2 additions & 1 deletion test/test_table.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: 2023 Erin Catto
// SPDX-License-Identifier: MIT

#include "atomic.h"
#include "core.h"
#include "ctz.h"
#include "table.h"
Expand Down Expand Up @@ -64,7 +65,7 @@ int TableTest( void )

#if B2_SNOOP_TABLE_COUNTERS
extern b2AtomicInt b2_probeCount;
b2AtomicStoreInt(&b2_probeCount, 0);
b2AtomicStoreInt( &b2_probeCount, 0 );
#endif

// Test key search
Expand Down

0 comments on commit 03a8ad9

Please sign in to comment.