diff --git a/benchmark/tumbler.c b/benchmark/tumbler.c index fcfb77d3c..a501aa643 100644 --- a/benchmark/tumbler.c +++ b/benchmark/tumbler.c @@ -24,13 +24,13 @@ b2WorldId Tumbler( b2WorldDef* worldDef ) shapeDef.density = 50.0f; b2Polygon polygon; - polygon = b2MakeOffsetBox( 0.5f, 10.0f, ( b2Vec2 ){ 10.0f, 0.0f }, 0.0 ); + polygon = b2MakeOffsetBox( 0.5f, 10.0f, ( b2Vec2 ){ 10.0f, 0.0f }, b2Rot_identity ); b2CreatePolygonShape( bodyId, &shapeDef, &polygon ); - polygon = b2MakeOffsetBox( 0.5f, 10.0f, ( b2Vec2 ){ -10.0f, 0.0f }, 0.0 ); + polygon = b2MakeOffsetBox( 0.5f, 10.0f, ( b2Vec2 ){ -10.0f, 0.0f }, b2Rot_identity ); b2CreatePolygonShape( bodyId, &shapeDef, &polygon ); - polygon = b2MakeOffsetBox( 10.0f, 0.5f, ( b2Vec2 ){ 0.0f, 10.0f }, 0.0 ); + polygon = b2MakeOffsetBox( 10.0f, 0.5f, ( b2Vec2 ){ 0.0f, 10.0f }, b2Rot_identity ); b2CreatePolygonShape( bodyId, &shapeDef, &polygon ); - polygon = b2MakeOffsetBox( 10.0f, 0.5f, ( b2Vec2 ){ 0.0f, -10.0f }, 0.0 ); + polygon = b2MakeOffsetBox( 10.0f, 0.5f, ( b2Vec2 ){ 0.0f, -10.0f }, b2Rot_identity ); b2CreatePolygonShape( bodyId, &shapeDef, &polygon ); float motorSpeed = 25.0f; diff --git a/include/box2d/base.h b/include/box2d/base.h index 7e6b8f5f8..f622bea42 100644 --- a/include/box2d/base.h +++ b/include/box2d/base.h @@ -5,23 +5,24 @@ #include +// clang-format off +// // Shared library macros #if defined( _MSC_VER ) && defined( box2d_EXPORTS ) // build the Windows DLL #define BOX2D_EXPORT __declspec( dllexport ) #elif defined( _MSC_VER ) && defined( BOX2D_DLL ) -// using the Windows DLL + // using the Windows DLL #define BOX2D_EXPORT __declspec( dllimport ) #elif defined( box2d_EXPORTS ) -// building or using the Box2D shared library + // building or using the Box2D shared library #define BOX2D_EXPORT __attribute__( ( visibility( "default" ) ) ) #else -// static library + // static library #define BOX2D_EXPORT #endif // C++ macros -// clang-format off #ifdef __cplusplus #define B2_API extern "C" BOX2D_EXPORT #define B2_INLINE inline @@ -34,6 +35,43 @@ #define B2_LITERAL(T) (T) #define B2_ZERO_INIT {0} #endif + +// Define CPU +#if defined( __x86_64__ ) || defined( _M_X64 ) + #define B2_CPU_X64 +#elif defined( __aarch64__ ) || defined( _M_ARM64 ) + #define B2_CPU_ARM +#elif defined( __EMSCRIPTEN__ ) + #define B2_CPU_WASM +#else + #error Unsupported CPU +#endif + +// Define SIMD +#if defined( BOX2D_ENABLE_SIMD ) + #if defined( B2_CPU_X64 ) + #if defined( BOX2D_AVX2 ) + #define B2_SIMD_AVX2 + #define B2_SIMD_WIDTH 8 + #else + #define B2_SIMD_SSE2 + #define B2_SIMD_WIDTH 4 + #endif + #elif defined( B2_CPU_ARM ) + #define B2_SIMD_NEON + #define B2_SIMD_WIDTH 4 + #elif defined( B2_CPU_WASM ) + #define B2_SIMD_SSE2 + #define B2_SIMD_WIDTH 4 + #else + #define B2_SIMD_NONE + #define B2_SIMD_WIDTH 4 + #endif +#else + #define B2_SIMD_NONE + #define B2_SIMD_WIDTH 4 +#endif + // clang-format on /** diff --git a/include/box2d/math_functions.h b/include/box2d/math_functions.h index 88006bcee..acb30ecb6 100644 --- a/include/box2d/math_functions.h +++ b/include/box2d/math_functions.h @@ -5,9 +5,16 @@ #include "base.h" +#include #include #include +#if defined( B2_SIMD_AVX2 ) || defined( B2_SIMD_SSE2 ) +#include +#elif defined( B2_SIMD_NEON ) +#include +#endif + /** * @defgroup math Math * @brief Vector math types and functions @@ -239,20 +246,129 @@ B2_INLINE b2Vec2 b2Clamp( b2Vec2 v, b2Vec2 a, b2Vec2 b ) return c; } +#if 1 +B2_INLINE float b2Sqrt( float x ) +{ + //return sqrtf( x ); + +#if defined( B2_SIMD_AVX2 ) || defined( B2_SIMD_SSE2 ) + return _mm_cvtss_f32( _mm_sqrt_ss( _mm_set1_ps( x ) ) ); +#elif defined( B2_SIMD_NEON ) + float32x4_t v = vdupq_n_f32( x ); + return vgetq_lane_f32( vsqrtq_f32( v ), 0 ); +#else + return sqrtf( x ); +#endif +} + +B2_INLINE float b2Length( b2Vec2 v ) +{ + return b2Sqrt( v.x * v.x + v.y * v.y ); +} + +B2_INLINE float b2Distance( b2Vec2 a, b2Vec2 b ) +{ + float dx = b.x - a.x; + float dy = b.y - a.y; + return b2Sqrt( dx * dx + dy * dy ); +} + +B2_INLINE b2Vec2 b2Normalize( b2Vec2 v ) +{ + float length = b2Sqrt( v.x * v.x + v.y * v.y ); + if ( length < FLT_EPSILON ) + { + return b2Vec2_zero; + } + + float invLength = 1.0f / length; + b2Vec2 n = { invLength * v.x, invLength * v.y }; + return n; +} + +B2_INLINE b2Vec2 b2NormalizeChecked( b2Vec2 v ) +{ + float length = b2Length( v ); + if ( length < FLT_EPSILON ) + { + return b2Vec2_zero; + } + + float invLength = 1.0f / length; + b2Vec2 n = { invLength * v.x, invLength * v.y }; + return n; +} + +B2_INLINE b2Vec2 b2GetLengthAndNormalize( float* length, b2Vec2 v ) +{ + *length = b2Length( v ); + if ( *length < FLT_EPSILON ) + { + return b2Vec2_zero; + } + + float invLength = 1.0f / *length; + b2Vec2 n = { invLength * v.x, invLength * v.y }; + return n; +} + +B2_INLINE b2Rot b2NormalizeRot( b2Rot q ) +{ + float mag = b2Sqrt( q.s * q.s + q.c * q.c ); + float invMag = mag > 0.0 ? 1.0f / mag : 0.0f; + b2Rot qn = { q.c * invMag, q.s * invMag }; + return qn; +} + +B2_INLINE b2Rot b2IntegrateRotation( b2Rot q1, float deltaAngle ) +{ + // dc/dt = -omega * sin(t) + // ds/dt = omega * cos(t) + // c2 = c1 - omega * h * s1 + // s2 = s1 + omega * h * c1 + b2Rot q2 = { q1.c - deltaAngle * q1.s, q1.s + deltaAngle * q1.c }; + float mag = b2Sqrt( q2.s * q2.s + q2.c * q2.c ); + float invMag = mag > 0.0 ? 1.0f / mag : 0.0f; + b2Rot qn = { q2.c * invMag, q2.s * invMag }; + return qn; +} + +#else + B2_API float b2Sqrt( float x ); +/// Convert a vector into a unit vector if possible, otherwise returns the zero vector. +B2_API b2Vec2 b2Normalize( b2Vec2 v ); + +/// Convert a vector into a unit vector if possible, otherwise asserts. +B2_API b2Vec2 b2NormalizeChecked( b2Vec2 v ); + +/// Convert a vector into a unit vector if possible, otherwise returns the zero vector. Also +/// outputs the length. +B2_API b2Vec2 b2GetLengthAndNormalize( float* length, b2Vec2 v ); + /// Get the length of this vector (the norm) B2_API float b2Length( b2Vec2 v ); +/// Get the distance between two points +B2_API float b2Distance( b2Vec2 a, b2Vec2 b ); + +/// Integration rotation from angular velocity +/// @param q1 initial rotation +/// @param deltaAngle the angular displacement in radians +B2_API b2Rot b2IntegrateRotation( b2Rot q1, float deltaAngle ); + +/// Normalize rotation +B2_API b2Rot b2NormalizeRot( b2Rot q ); + +#endif + /// Get the length squared of this vector B2_INLINE float b2LengthSquared( b2Vec2 v ) { return v.x * v.x + v.y * v.y; } -/// Get the distance between two points -B2_API float b2Distance( b2Vec2 a, b2Vec2 b ); - /// Get the distance squared between points B2_INLINE float b2DistanceSquared( b2Vec2 a, b2Vec2 b ) { @@ -263,9 +379,6 @@ B2_INLINE float b2DistanceSquared( b2Vec2 a, b2Vec2 b ) /// Make a rotation using an angle in radians B2_API b2Rot b2MakeRot( float angle ); -/// Normalize rotation -B2_API b2Rot b2NormalizeRot( b2Rot q ); - /// Is this rotation normalized? B2_INLINE bool b2IsNormalized( b2Rot q ) { @@ -287,10 +400,6 @@ B2_INLINE b2Rot b2NLerp( b2Rot q1, b2Rot q2, float t ) return b2NormalizeRot( q ); } -/// Integration rotation from angular velocity -/// @param q1 initial rotation -/// @param deltaAngle the angular displacement in radians -B2_API b2Rot b2IntegrateRotation( b2Rot q1, float deltaAngle ); /// Compute the angular velocity necessary to rotate between two rotations over a give time /// @param q1 initial rotation @@ -537,16 +646,6 @@ B2_API bool b2Rot_IsValid( b2Rot q ); /// Is this a valid bounding box? Not Nan or infinity. Upper bound greater than or equal to lower bound. B2_API bool b2AABB_IsValid( b2AABB aabb ); -/// Convert a vector into a unit vector if possible, otherwise returns the zero vector. -B2_API b2Vec2 b2Normalize( b2Vec2 v ); - -/// Convert a vector into a unit vector if possible, otherwise asserts. -B2_API b2Vec2 b2NormalizeChecked( b2Vec2 v ); - -/// Convert a vector into a unit vector if possible, otherwise returns the zero vector. Also -/// outputs the length. -B2_API b2Vec2 b2GetLengthAndNormalize( float* length, b2Vec2 v ); - /// Box2D bases all length units on meters, but you may need different units for your game. /// You can set this value to use different units. This should be done at application startup /// and only modified once. Default value is 1. diff --git a/samples/sample_determinism.cpp b/samples/sample_determinism.cpp index 7ebc79e59..c37b574d0 100644 --- a/samples/sample_determinism.cpp +++ b/samples/sample_determinism.cpp @@ -81,7 +81,8 @@ class FallingHinges : public Sample b2BodyDef bodyDef = b2DefaultBodyDef(); bodyDef.type = b2_dynamicBody; - bodyDef.position = { x + offset * i, h + 2.0f * h * i }; + bodyDef.position.x = x + offset * i; + bodyDef.position.y = h + 2.0f * h * i; // this tests the deterministic cosine and sine functions bodyDef.rotation = b2MakeRot( 0.01f * i - 0.1f ); diff --git a/src/core.h b/src/core.h index 4e6aa27a4..a904d1296 100644 --- a/src/core.h +++ b/src/core.h @@ -39,55 +39,6 @@ #error Unsupported platform #endif -// Define CPU -#if defined( __x86_64__ ) || defined( _M_X64 ) -#define B2_CPU_X64 -#elif defined( __aarch64__ ) || defined( _M_ARM64 ) -#define B2_CPU_ARM -#elif defined( __EMSCRIPTEN__ ) -#define B2_CPU_WASM -#else -#error Unsupported CPU -#endif - -// Define SIMD -#if defined( BOX2D_ENABLE_SIMD ) - -#if defined( B2_CPU_X64 ) - -#if defined( BOX2D_AVX2 ) -#define B2_SIMD_AVX2 -#define B2_SIMD_WIDTH 8 -#else -#define B2_SIMD_SSE2 -#define B2_SIMD_WIDTH 4 -#endif - -#elif defined( B2_CPU_ARM ) - -#define B2_SIMD_NEON -#define B2_SIMD_WIDTH 4 - -#elif defined( __EMSCRIPTEN__ ) - -#define B2_CPU_WASM -#define B2_SIMD_SSE2 -#define B2_SIMD_WIDTH 4 - -#else - -#define B2_SIMD_NONE -#define B2_SIMD_WIDTH 4 - -#endif - -#else - -#define B2_SIMD_NONE -#define B2_SIMD_WIDTH 4 - -#endif - // Define compiler #if defined( __clang__ ) #define B2_COMPILER_CLANG diff --git a/src/math_functions.c b/src/math_functions.c index 147708695..d9d1d20ac 100644 --- a/src/math_functions.c +++ b/src/math_functions.c @@ -7,11 +7,11 @@ #include -#if defined( B2_CPU_X64 ) || defined( B2_CPU_WASM ) -#include -#else -#include -#endif +//#if defined( B2_CPU_X64 ) || defined( B2_CPU_WASM ) +//#include +//#else +//#include +//#endif bool b2IsValid( float a ) { @@ -58,16 +58,21 @@ bool b2Rot_IsValid( b2Rot q ) return b2IsNormalized( q ); } +#if 0 float b2Sqrt(float x) { +#if 1 return sqrtf( x ); - -//#if defined( B2_CPU_X64 ) || defined( B2_CPU_WASM ) -// return _mm_cvtss_f32(_mm_sqrt_ss( _mm_set1_ps(x) )); -//#else -// float32x4_t v = vdupq_n_f32( x ); -// return vgetq_lane_f32( vsqrtq_f32( v ), 0 ); -//#endif +#else +#if defined( B2_SIMD_AVX2 ) || defined( B2_SIMD_SSE2 ) + return _mm_cvtss_f32(_mm_sqrt_ss( _mm_set1_ps(x) )); +#elif defined( B2_SIMD_NEON ) + float32x4_t v = vdupq_n_f32( x ); + return vgetq_lane_f32( vsqrtq_f32( v ), 0 ); +#else + return sqrtf( x ); +#endif +#endif } float b2Length( b2Vec2 v ) @@ -142,6 +147,8 @@ b2Rot b2IntegrateRotation( b2Rot q1, float deltaAngle ) b2Rot qn = { q2.c * invMag, q2.s * invMag }; return qn; } +#endif + #if 0 // From diff --git a/test/test_determinism.c b/test/test_determinism.c index 3a57e2bfa..58b3278b4 100644 --- a/test/test_determinism.c +++ b/test/test_determinism.c @@ -258,6 +258,9 @@ static int CrossPlatformTest(void) bodyDef.position.x = x + offset * i; bodyDef.position.y = h + 2.0f * h * i; + // this tests the deterministic cosine and sine functions + bodyDef.rotation = b2MakeRot( 0.01f * i - 0.1f ); + b2BodyId bodyId = b2CreateBody( worldId, &bodyDef ); if ( ( i & 1 ) == 0 ) @@ -326,11 +329,10 @@ static int CrossPlatformTest(void) stepCount += 1; } + // sleep step = 316, hash = 0xd9b31433 ENSURE( stepCount < maxSteps ); - ENSURE( sleepStep == 295 ); - ENSURE( hash == 0x4a4d465c ); - //ENSURE( sleepStep == 378 ); - //ENSURE( hash == 0x2d569026 ); + ENSURE( sleepStep == 316 ); + ENSURE( hash == 0xd9b31433 ); free( bodies );