Skip to content

Commit

Permalink
Add calcN
Browse files Browse the repository at this point in the history
  • Loading branch information
zargarzadehm committed Nov 8, 2021
1 parent d46e316 commit 73f86a4
Show file tree
Hide file tree
Showing 22 changed files with 158 additions and 108 deletions.
12 changes: 6 additions & 6 deletions ErgoOpenCL.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<IncludePath>C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)</IncludePath>
<IncludePath>C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<IncludePath>C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)</IncludePath>
<IncludePath>C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)</IncludePath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
Expand All @@ -89,11 +89,11 @@
<Optimization>Disabled</Optimization>
<SDLCheck>false</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>C:\boost_1_55_0\boost_1_55_0;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl;C:\OpenSSL-Win64\include;C:\Program Files %28x86%29\AMD APP SDK\3.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>C:\boost_1_55_0\boost_1_55_0;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl;C:\OpenSSL-Win64\include;C:\Program Files %28x86%29\AMD APP SDK\3.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>C:\OpenSSL-Win64\lib;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>C:\OpenSSL-Win64\lib;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>OpenCL.lib;libcurl.lib;libeay32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
Expand All @@ -120,13 +120,13 @@
<SDLCheck>false</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl;C:\OpenSSL-Win64\include;C:\Program Files %28x86%29\AMD APP SDK\3.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl;C:\OpenSSL-Win64\include;C:\Program Files %28x86%29\AMD APP SDK\3.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>OpenCL.lib;libcurl.lib;libeay32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>C:\OpenSSL-Win64\lib;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>C:\OpenSSL-Win64\lib;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
Expand Down
Binary file removed Hiveos/ergoopenclV2-2.0.tar.gz
Binary file not shown.
Binary file added Hiveos/ergoopenclV2-2.1.tar.gz
Binary file not shown.
28 changes: 17 additions & 11 deletions MiningKernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const __constant cl_ulong ivals[8] = {


__kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
const cl_uint n_len,

// precalculated hashes
global const cl_uint* hashes,
// intermediate Hashes
Expand Down Expand Up @@ -111,15 +113,16 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
//((uint8_t*)&h2)[5] = ((uint8_t*)r)[26];
//((uint8_t*)&h2)[6] = ((uint8_t*)r)[25];
//((uint8_t*)&h2)[7] = ((uint8_t*)r)[24];
h3 = h2 % n_len;

h3 = h2 % N_LEN;
//--------------------------read hash from lookup
//--------------------------read hash from lookup
cl_uint tmpL;
#pragma unroll 8
for (int i = 0; i < 8; ++i)
for (int i = 0; i < 32; ++i)
{
tmpL = hashes[(h3 << 3) + i];
reverseBytesInt(tmpL, r[7 - i]);
//tmpL = hashes[(h3 << 3) + i];
//reverseBytesInt(tmpL, r[7 - i]);
((uint8_t *)r)[31-i] = ((global uint8_t *)hashes)[h3 * 32 + i];
}
//------------------------------------------------------

Expand Down Expand Up @@ -180,6 +183,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
}

__kernel void BlockMiningStep2(
const cl_uint N_MASK,
// boundary for puzzle
global const cl_uint* bound,
// data: mes
Expand Down Expand Up @@ -238,16 +242,16 @@ __kernel void BlockMiningStep2(
((uint8_t *)r)[34] = ((uint8_t *)r)[2];
((uint8_t *)r)[35] = ((uint8_t *)r)[3];


#pragma unroll
for (int k = 0; k < K_LEN; k += 4)
{
ind[k] = r[k >> 2] & N_MASK;
ind[k + 1] = ((r[k >> 2] << 8) | (r[(k >> 2) + 1] >> 24)) & N_MASK;
ind[k + 2] = ((r[k >> 2] << 16) | (r[(k >> 2) + 1] >> 16)) & N_MASK;
ind[k + 3] = ((r[k >> 2] << 24) | (r[(k >> 2) + 1] >> 8)) & N_MASK;
ind[k] = r[k >> 2] % N_MASK;
ind[k + 1] = ((r[k >> 2] << 8) | (r[(k >> 2) + 1] >> 24)) % N_MASK;
ind[k + 2] = ((r[k >> 2] << 16) | (r[(k >> 2) + 1] >> 16)) % N_MASK;
ind[k + 3] = ((r[k >> 2] << 24) | (r[(k >> 2) + 1] >> 8)) % N_MASK;
}


//================================================================//
// Calculate result
//================================================================//
Expand Down Expand Up @@ -349,6 +353,7 @@ __kernel void BlockMiningStep2(
fn_Add(r[8], 0, CV, r[8], CV);
}



//--------------------hash(f)--------------------
//====================================================================//
Expand Down Expand Up @@ -398,7 +403,8 @@ __kernel void BlockMiningStep2(
//================================================================//
j = ((cl_ulong*)r)[3] < ((cl_ulong global*)bound)[3] || ((cl_ulong*)r)[3] == ((cl_ulong global*)bound)[3] && (((cl_ulong*)r)[2] < ((cl_ulong global*)bound)[2] || ((cl_ulong*)r)[2] == ((cl_ulong global*)bound)[2] && (((cl_ulong*)r)[1] < ((cl_ulong global*)bound)[1] || ((cl_ulong*)r)[1] == ((cl_ulong global*)bound)[1] && ((cl_ulong*)r)[0] < ((cl_ulong global*)bound)[0]));

if (j)//

if (j )//
{
cl_uint oldC = atomic_inc(vCount);

Expand Down
24 changes: 8 additions & 16 deletions OCLdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,14 @@ typedef unsigned __int32 cl_uint;
#define K_LEN 32

// N: number of precalculated hashes
#define N_LEN 0x4000000// kamtar az 64(BLOK_DIM) nabashad
#define INIT_N_LEN 0x4000000
#define MAX_N_LEN 0x7FC9FF98
#define IncreaseStart (600*1024)
#define IncreaseEnd (4198400)
#define IncreasePeriodForN (50*1024)



#define Sol_Index 0x3381BF + 10
////////////////////////////////////////////////////////////////////////////////
// PARAMETERS: Heuristic prehash kernel parameters
Expand Down Expand Up @@ -206,17 +213,6 @@ struct ctx_t;
* BLOCK_DIM \
)

// necessary workspace size
#define WORKSPACE_SIZE_8 \
( \
( \
(cl_uint)((N_LEN << 1) + 1) * INDEX_SIZE_8 \
> NONCES_PER_ITER * (NUM_SIZE_8 + (INDEX_SIZE_8 << 1)) + INDEX_SIZE_8 \
)? \
(cl_uint)((N_LEN << 1) + 1) * INDEX_SIZE_8: \
NONCES_PER_ITER * (NUM_SIZE_8 + (INDEX_SIZE_8 << 1)) + INDEX_SIZE_8 \
)

//============================================================================//
// GPU shared memory
//============================================================================//
Expand All @@ -242,11 +238,7 @@ struct ctx_t;
#define ROUND_NC_SIZE_32 (NC_SIZE_32_BLOCK * BLOCK_DIM)

//============================================================================//
// Heuristic CUDA parameters
//============================================================================//
// mod 2^26 mask
#define N_MASK (N_LEN - 1)

// number of threads per iteration
#define THREADS_PER_ITER (NONCES_PER_ITER / NONCES_PER_THREAD)

Expand Down
4 changes: 2 additions & 2 deletions PreHashKernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f)
__kernel void InitPrehash(
// data: height
const cl_uint h,
const cl_uint n_len,
// hashes
global cl_uint * hashes
)
Expand All @@ -100,7 +101,7 @@ __kernel void InitPrehash(

tid = get_global_id(0);

if (tid < N_LEN)
if (tid < n_len)
{
ulong h[8];
ulong b[16];
Expand Down Expand Up @@ -164,7 +165,6 @@ __kernel void InitPrehash(
//====================================================================//
#pragma unroll
for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210);

((__global uchar *)hashes)[tid * 32 + 31] = 0;

}
Expand Down
28 changes: 17 additions & 11 deletions Ubuntu/MiningKernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const __constant cl_ulong ivals[8] = {


__kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
const cl_uint n_len,

// precalculated hashes
global const cl_uint* hashes,
// intermediate Hashes
Expand Down Expand Up @@ -111,15 +113,16 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
//((uint8_t*)&h2)[5] = ((uint8_t*)r)[26];
//((uint8_t*)&h2)[6] = ((uint8_t*)r)[25];
//((uint8_t*)&h2)[7] = ((uint8_t*)r)[24];
h3 = h2 % n_len;

h3 = h2 % N_LEN;
//--------------------------read hash from lookup
//--------------------------read hash from lookup
cl_uint tmpL;
#pragma unroll 8
for (int i = 0; i < 8; ++i)
for (int i = 0; i < 32; ++i)
{
tmpL = hashes[(h3 << 3) + i];
reverseBytesInt(tmpL, r[7 - i]);
//tmpL = hashes[(h3 << 3) + i];
//reverseBytesInt(tmpL, r[7 - i]);
((uint8_t *)r)[31-i] = ((global uint8_t *)hashes)[h3 * 32 + i];
}
//------------------------------------------------------

Expand Down Expand Up @@ -180,6 +183,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
}

__kernel void BlockMiningStep2(
const cl_uint N_MASK,
// boundary for puzzle
global const cl_uint* bound,
// data: mes
Expand Down Expand Up @@ -238,16 +242,16 @@ __kernel void BlockMiningStep2(
((uint8_t *)r)[34] = ((uint8_t *)r)[2];
((uint8_t *)r)[35] = ((uint8_t *)r)[3];


#pragma unroll
for (int k = 0; k < K_LEN; k += 4)
{
ind[k] = r[k >> 2] & N_MASK;
ind[k + 1] = ((r[k >> 2] << 8) | (r[(k >> 2) + 1] >> 24)) & N_MASK;
ind[k + 2] = ((r[k >> 2] << 16) | (r[(k >> 2) + 1] >> 16)) & N_MASK;
ind[k + 3] = ((r[k >> 2] << 24) | (r[(k >> 2) + 1] >> 8)) & N_MASK;
ind[k] = r[k >> 2] % N_MASK;
ind[k + 1] = ((r[k >> 2] << 8) | (r[(k >> 2) + 1] >> 24)) % N_MASK;
ind[k + 2] = ((r[k >> 2] << 16) | (r[(k >> 2) + 1] >> 16)) % N_MASK;
ind[k + 3] = ((r[k >> 2] << 24) | (r[(k >> 2) + 1] >> 8)) % N_MASK;
}


//================================================================//
// Calculate result
//================================================================//
Expand Down Expand Up @@ -349,6 +353,7 @@ __kernel void BlockMiningStep2(
fn_Add(r[8], 0, CV, r[8], CV);
}



//--------------------hash(f)--------------------
//====================================================================//
Expand Down Expand Up @@ -398,7 +403,8 @@ __kernel void BlockMiningStep2(
//================================================================//
j = ((cl_ulong*)r)[3] < ((cl_ulong global*)bound)[3] || ((cl_ulong*)r)[3] == ((cl_ulong global*)bound)[3] && (((cl_ulong*)r)[2] < ((cl_ulong global*)bound)[2] || ((cl_ulong*)r)[2] == ((cl_ulong global*)bound)[2] && (((cl_ulong*)r)[1] < ((cl_ulong global*)bound)[1] || ((cl_ulong*)r)[1] == ((cl_ulong global*)bound)[1] && ((cl_ulong*)r)[0] < ((cl_ulong global*)bound)[0]));

if (j)//

if (j )//
{
cl_uint oldC = atomic_inc(vCount);

Expand Down
24 changes: 8 additions & 16 deletions Ubuntu/OCLdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,14 @@ typedef unsigned __int32 cl_uint;
#define K_LEN 32

// N: number of precalculated hashes
#define N_LEN 0x4000000// kamtar az 64(BLOK_DIM) nabashad
#define INIT_N_LEN 0x4000000
#define MAX_N_LEN 0x7FC9FF98
#define IncreaseStart (600*1024)
#define IncreaseEnd (4198400)
#define IncreasePeriodForN (50*1024)



#define Sol_Index 0x3381BF + 10
////////////////////////////////////////////////////////////////////////////////
// PARAMETERS: Heuristic prehash kernel parameters
Expand Down Expand Up @@ -206,17 +213,6 @@ struct ctx_t;
* BLOCK_DIM \
)

// necessary workspace size
#define WORKSPACE_SIZE_8 \
( \
( \
(cl_uint)((N_LEN << 1) + 1) * INDEX_SIZE_8 \
> NONCES_PER_ITER * (NUM_SIZE_8 + (INDEX_SIZE_8 << 1)) + INDEX_SIZE_8 \
)? \
(cl_uint)((N_LEN << 1) + 1) * INDEX_SIZE_8: \
NONCES_PER_ITER * (NUM_SIZE_8 + (INDEX_SIZE_8 << 1)) + INDEX_SIZE_8 \
)

//============================================================================//
// GPU shared memory
//============================================================================//
Expand All @@ -242,11 +238,7 @@ struct ctx_t;
#define ROUND_NC_SIZE_32 (NC_SIZE_32_BLOCK * BLOCK_DIM)

//============================================================================//
// Heuristic CUDA parameters
//============================================================================//
// mod 2^26 mask
#define N_MASK (N_LEN - 1)

// number of threads per iteration
#define THREADS_PER_ITER (NONCES_PER_ITER / NONCES_PER_THREAD)

Expand Down
4 changes: 2 additions & 2 deletions Ubuntu/PreHashKernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f)
__kernel void InitPrehash(
// data: height
const cl_uint h,
const cl_uint n_len,
// hashes
global cl_uint * hashes
)
Expand All @@ -100,7 +101,7 @@ __kernel void InitPrehash(

tid = get_global_id(0);

if (tid < N_LEN)
if (tid < n_len)
{
ulong h[8];
ulong b[16];
Expand Down Expand Up @@ -164,7 +165,6 @@ __kernel void InitPrehash(
//====================================================================//
#pragma unroll
for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210);

((__global uchar *)hashes)[tid * 32 + 31] = 0;

}
Expand Down
7 changes: 6 additions & 1 deletion clMining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,16 @@ void MiningClass::hBlockMining(
// indices of valid solutions
cl_mem valid,
cl_mem vCount,
cl_mem BHashes
uint64_t N_LEN ,
cl_mem BHashes

)
{
cl_kernel kernelStep1 = program->getKernel("BlockMiningStep1");
int id = 0;
cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_mem), &mes));
cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_ulong), &base));
cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_uint), &N_LEN));
cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_mem), &hashes));
cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_mem), &BHashes));

Expand All @@ -86,8 +89,10 @@ void MiningClass::hBlockMining(
cl->checkError(err);

//--------------------------------

cl_kernel kernelStep2 = program->getKernel("BlockMiningStep2");
id = 0;
cl->checkError(clSetKernelArg(kernelStep2, id++, sizeof(cl_uint), &N_LEN));
cl->checkError(clSetKernelArg(kernelStep2, id++, sizeof(cl_mem), &bound));
cl->checkError(clSetKernelArg(kernelStep2, id++, sizeof(cl_mem), &mes));
cl->checkError(clSetKernelArg(kernelStep2, id++, sizeof(cl_ulong), &base));
Expand Down
3 changes: 2 additions & 1 deletion clMining.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ class MiningClass
// indices of valid solutions
cl_mem valid,
cl_mem vCount,
cl_mem BHashes = NULL
uint64_t N_LEN,
cl_mem BHashes = NULL
);
};

4 changes: 3 additions & 1 deletion clPreHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ PreHashClass::~PreHashClass()
}

int PreHashClass::Prehash(
uint64_t N_LEN,
// data: height
cl_uint height,
// hashes
Expand All @@ -25,7 +26,8 @@ int PreHashClass::Prehash(
cl_kernel kernel = program->getKernel("InitPrehash");

cl->checkError(clSetKernelArg(kernel, 0, sizeof(cl_uint), &height));
cl->checkError(clSetKernelArg(kernel, 1, sizeof(cl_mem), &hashes));
cl->checkError(clSetKernelArg(kernel, 1, sizeof(cl_uint), &N_LEN));
cl->checkError(clSetKernelArg(kernel, 2, sizeof(cl_mem), &hashes));



Expand Down
Loading

0 comments on commit 73f86a4

Please sign in to comment.