Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Optimizer (float arithmetic and more) #472

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions amxmodx/AMBuilder
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,17 @@ if builder.target_platform == 'mac':
binary.Dep('JIT/amxexecn-darwin.o'),
binary.Dep('JIT/amxjitsn-darwin.o'),
binary.Dep('JIT/natives-darwin-x86.o'),
binary.Dep('JIT/helpers-darwin-x86.o'),
]
elif builder.target_platform == 'linux':
jit_objects = [
binary.Dep('JIT/amxexecn.o'),
binary.Dep('JIT/amxjitsn.o'),
binary.Dep('JIT/natives-x86.o'),
binary.Dep('JIT/helpers-x86.o'),
]
elif builder.target_platform == 'windows':
jit_objects = [
binary.Dep('JIT/amxexecn.obj'),
binary.Dep('JIT/amxjitsn.obj'),
binary.Dep('JIT/helpers-x86.obj'),
binary.Dep('JIT/natives-x86.obj'),
]

Expand Down Expand Up @@ -74,6 +71,7 @@ binary.sources = [
'newmenus.cpp',
'debugger.cpp',
'optimizer.cpp',
'cpuinfo.cpp',
'format.cpp',
'messages.cpp',
'libraries.cpp',
Expand Down
Binary file modified amxmodx/JIT/amxjitsn.obj
Binary file not shown.
121 changes: 51 additions & 70 deletions amxmodx/amxjitsn.asm
Original file line number Diff line number Diff line change
Expand Up @@ -1914,106 +1914,87 @@ OP_BREAK:
OP_FLOAT_MUL:
GO_ON j_float_mul, OP_FLOAT_DIV
j_float_mul:
fld dword [esi+4]
fmul dword [esi+8]
sub esp, 4
fstp dword [esp]
pop eax
movss xmm0, dword [esi+4]
mulss xmm0, dword [esi+8]
movd eax, xmm0
CHECKCODESIZE j_float_mul

OP_FLOAT_DIV:
GO_ON j_float_div, OP_FLOAT_ADD
j_float_div:
fld dword [esi+4]
fdiv dword [esi+8]
sub esp, 4
fstp dword [esp]
pop eax
movss xmm0, dword [esi+4]
divss xmm0, dword [esi+8]
movd eax, xmm0
CHECKCODESIZE j_float_div

OP_FLOAT_ADD:
GO_ON j_float_add, OP_FLOAT_SUB
j_float_add:
fld dword [esi+4]
fadd dword [esi+8]
sub esp, 4
fstp dword [esp]
pop eax
movss xmm0, dword [esi+4]
addss xmm0, dword [esi+8]
movd eax, xmm0
CHECKCODESIZE j_float_add

OP_FLOAT_SUB:
GO_ON j_float_sub, OP_FLOAT_TO
j_float_sub:
fld dword [esi+4]
fsub dword [esi+8]
sub esp, 4
fstp dword [esp]
pop eax
movss xmm0, dword [esi+4]
subss xmm0, dword [esi+8]
movd eax, xmm0
CHECKCODESIZE j_float_sub

OP_FLOAT_TO:
GO_ON j_float_to, OP_FLOAT_ROUND
j_float_to:
fild dword [esi+4]
sub esp, 4
fstp dword [esp]
pop eax
cvtsi2ss xmm0, dword [esi+4]
movd eax, xmm0
CHECKCODESIZE j_float_to

OP_FLOAT_ROUND:
GO_ON j_float_round, OP_FLOAT_CMP
j_float_round:
;get the float control word
push 0
mov ebp,esp
fstcw [ebp]
mov eax,[ebp]
push eax
;clear the top bits
xor ah,ah
;get the control method
push edx
mov edx,[esi+8]
and edx,3 ;sanity check
shl edx,2 ;shift it to right position
;set the bits
or ah,dl ;set bits 15,14 of FCW to rounding method
or ah,3 ;set precision to 64bit

;calculate
sub esp,4
fld dword [esi+4]
test edx,edx
jnz .skip_correct
;nearest mode
;correct so as to AVOID bankers rounding
or ah, 4 ;set rounding mode to floor
fadd dword [g_round_nearest]

.skip_correct:
mov [ebp], eax
fldcw [ebp]
frndint
fistp dword [esp]
pop eax
.done:
pop edx
;restore bits
pop ebp
mov [esp], ebp
fldcw [esp]
pop ebp
cmp dword [esi+8], 0
jne .Floor
;if (arg2 == 0) ROUND
;{
cvtss2si eax, dword [esi+4]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is correct but breaks compatibility. Oddly, amxmodx doesn't do regular "banker's rounding", but instead does basically floor(x + 0.5). Therefore, without this change, rounding 2.5 gives you 3, and with your change rounding 2.5 gives you 2.

Personally, I wouldn't mind breaking compatibility here, but considering this could easily break some plugins, Arkshine and others might not agree with this change, and they'd probably be right.

See this PR for some extra info.

;}
jmp .Done
.Floor:
cmp dword [esi+8], 1
jne .Ceil
;else if (arg2 == 1) FLOOR
;{
cvttss2si eax, dword [esi+4]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incorrect code. floor(-10.0) results in -11.

mov ebp, dword [esi+4]
shr ebp, 31
sub eax, ebp
;}
jmp .Done
.Ceil:
cmp dword [esi+8], 2
jne .Zero
;else if (arg2 == 2) CEIL
;{
movss xmm0, dword [esi+4]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also incorrect. You're basically doing round(x + 0.5) which is not equivalent to ceil rounding. This way ceil(1.0f) results in 2.

addss xmm0, dword [g_round_nearest]
cvtss2si eax, xmm0
;}
jmp .Done
.Zero:
;else ZERO
;{
cvttss2si eax, dword [esi+4]
;}
.Done:
CHECKCODESIZE j_float_round

OP_FLOAT_CMP:
GO_ON j_float_cmp, OP_INVALID
j_float_cmp:
fld dword [esi+8]
fld dword [esi+4]
fucompp
fnstsw ax
fwait
sahf
movss xmm0, dword [esi+8]
movss xmm1, dword [esi+4]
ucomiss xmm1, xmm0
cmovz eax, [g_flagsjit+4]
cmova eax, [g_flagsjit+8]
cmovb eax, [g_flagsjit+0]
Expand Down
1 change: 1 addition & 0 deletions amxmodx/amxxlog.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#ifndef __AMXXLOG_H__
#define __AMXXLOG_H__


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This boosts performance by 1000%

class CLog
{
private:
Expand Down
100 changes: 100 additions & 0 deletions amxmodx/cpuinfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#include "cpuinfo.h"


#ifdef _WIN32

// Windows
#include <intrin.h>
#define cpuid(info, x) __cpuidex(info, x, 0)

#else

// GCC Intrinsics
#include <cpuid.h>
void cpuid(int info[4], int InfoType){
__cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}

#endif

// CPU Info
CPUInfo::CPUInfo()
{
int info[4];
cpuid(info, 0);
int nIds = info[0];

cpuid(info, 0x80000000);
unsigned nExIds = info[0];

// Detect Features
if (nIds >= 0x00000001)
{
cpuid(info,0x00000001);
m_has_cmov = (info[3] & ((int)1 << 15)) != 0;
m_has_mmx = (info[3] & ((int)1 << 23)) != 0;

m_has_sse = (info[3] & ((int)1 << 25)) != 0;
m_has_sse2 = (info[3] & ((int)1 << 26)) != 0;
m_has_sse3 = (info[2] & ((int)1 << 0)) != 0;
m_has_ssse3 = (info[2] & ((int)1 << 9)) != 0;
m_has_sse4_1 = (info[2] & ((int)1 << 19)) != 0;
m_has_sse4_2 = (info[2] & ((int)1 << 20)) != 0;

m_has_clmul = (info[2] & ((int)1 << 1)) != 0;
m_has_fma3 = (info[2] & ((int)1 << 12)) != 0;
m_has_aes = (info[2] & ((int)1 << 25)) != 0;
m_has_avx = (info[2] & ((int)1 << 28)) != 0;
m_has_f16c = (info[2] & ((int)1 << 29)) != 0;
m_has_rdrand = (info[2] & ((int)1 << 30)) != 0;
}
if (nIds >= 0x00000007)
{
cpuid(info,0x00000007);

m_has_avx2 = (info[1] & ((int)1 << 5)) != 0;
m_has_avx512F = (info[1] & ((int)1 << 16)) != 0;
m_has_avx512CD = (info[1] & ((int)1 << 28)) != 0;
m_has_avx512PF = (info[1] & ((int)1 << 26)) != 0;
m_has_avx512ER = (info[1] & ((int)1 << 27)) != 0;
m_has_avx512VL = (info[1] & ((int)1 << 31)) != 0;
m_has_avx512BW = (info[1] & ((int)1 << 30)) != 0;
m_has_avx512DQ = (info[1] & ((int)1 << 17)) != 0;
m_has_avx512IFMA = (info[1] & ((int)1 << 21)) != 0;
m_has_avx512VBMI = (info[2] & ((int)1 << 1)) != 0;
}
if (nExIds >= 0x80000001)
{
cpuid(info,0x80000001);

m_has_xop = (info[2] & ((int)1 << 11)) != 0;
}
}


bool CPUInfo::has_cmov() const { return m_has_cmov; }
bool CPUInfo::has_mmx() const { return m_has_mmx; }
bool CPUInfo::has_sse() const { return m_has_sse; }
bool CPUInfo::has_sse2() const { return m_has_sse2; }
bool CPUInfo::has_sse3() const { return m_has_sse3; }
bool CPUInfo::has_ssse3() const { return m_has_ssse3; }
bool CPUInfo::has_sse4_1() const { return m_has_sse4_1; }
bool CPUInfo::has_sse4_2() const { return m_has_sse4_2; }
bool CPUInfo::has_clmul() const { return m_has_clmul; }
bool CPUInfo::has_fma3() const { return m_has_fma3; }
bool CPUInfo::has_aes() const { return m_has_aes; }
bool CPUInfo::has_avx() const { return m_has_avx; }
bool CPUInfo::has_avx2() const { return m_has_avx2; }
bool CPUInfo::has_f16c() const { return m_has_f16c; }
bool CPUInfo::has_rdrand() const { return m_has_rdrand; }
bool CPUInfo::has_xop() const { return m_has_xop; }
bool CPUInfo::has_avx512F() const { return m_has_avx512F; }
bool CPUInfo::has_avx512CD() const { return m_has_avx512CD; }
bool CPUInfo::has_avx512PF() const { return m_has_avx512PF; }
bool CPUInfo::has_avx512ER() const { return m_has_avx512ER; }
bool CPUInfo::has_avx512VL() const { return m_has_avx512VL; }
bool CPUInfo::has_avx512BW() const { return m_has_avx512BW; }
bool CPUInfo::has_avx512DQ() const { return m_has_avx512DQ; }
bool CPUInfo::has_avx512IFMA() const { return m_has_avx512IFMA; }
bool CPUInfo::has_avx512VBMI() const { return m_has_avx512VBMI; }

70 changes: 70 additions & 0 deletions amxmodx/cpuinfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#ifndef _INCLUDE_AMXMODX_CPUINFO_H
#define _INCLUDE_AMXMODX_CPUINFO_H

class CPUInfo
{
private:
bool m_has_cmov;
bool m_has_mmx;

bool m_has_sse;
bool m_has_sse2;
bool m_has_sse3;
bool m_has_ssse3;
bool m_has_sse4_1;
bool m_has_sse4_2;

bool m_has_clmul;
bool m_has_fma3;
bool m_has_aes;
bool m_has_avx;
bool m_has_avx2;
bool m_has_f16c;
bool m_has_rdrand;
bool m_has_xop;

bool m_has_avx512F;
bool m_has_avx512CD;
bool m_has_avx512PF;
bool m_has_avx512ER;
bool m_has_avx512VL;
bool m_has_avx512BW;
bool m_has_avx512DQ;
bool m_has_avx512IFMA;
bool m_has_avx512VBMI;

public:
CPUInfo();

bool has_cmov() const;
bool has_mmx() const;

bool has_sse() const;
bool has_sse2() const;
bool has_sse3() const;
bool has_ssse3() const;
bool has_sse4_1() const;
bool has_sse4_2() const;

bool has_clmul() const;
bool has_fma3() const;
bool has_aes() const;
bool has_avx() const;
bool has_avx2() const;
bool has_f16c() const;
bool has_rdrand() const;
bool has_xop() const;

bool has_avx512F() const;
bool has_avx512CD() const;
bool has_avx512PF() const;
bool has_avx512ER() const;
bool has_avx512VL() const;
bool has_avx512BW() const;
bool has_avx512DQ() const;
bool has_avx512IFMA() const;
bool has_avx512VBMI() const;
};


#endif //_INCLUDE_AMXMODX_CPUINFO_H
9 changes: 6 additions & 3 deletions amxmodx/meta_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ CLangMngr g_langMngr;
ke::AString g_log_dir;
ke::AString g_mod_name;
XVars g_xvars;
CPUInfo* g_cpuInfo;

bool g_bmod_tfc;
bool g_bmod_cstrike;
Expand Down Expand Up @@ -483,9 +484,11 @@ int C_Spawn(edict_t *pent)
// Set server flags
memset(g_players[0].flags, -1, sizeof(g_players[0].flags));

g_opt_level = atoi(get_localinfo("optimizer", "7"));
if (!g_opt_level)
g_opt_level = 7;
g_optimizerFlags = atoi(get_localinfo("optimizer", "7"));
if (!g_optimizerFlags)
g_optimizerFlags = OPT_FLOAT1_JIT | OPT_FLOAT2_JIT | OPT_VECTOR_JIT;

CheckOptimizerCPU();

// ###### Load AMX Mod X plugins
g_plugins.loadPluginsFromFile(get_localinfo("amxx_plugins", "addons/amxmodx/configs/plugins.ini"));
Expand Down
Loading