Add calcN

mhssamadani · Nov 8, 2021 · 73f86a4 · 73f86a4
1 parent d46e316
commit 73f86a4
Show file tree

Hide file tree

Showing 22 changed files with 158 additions and 108 deletions.
diff --git a/ErgoOpenCL.vcxproj b/ErgoOpenCL.vcxproj
@@ -70,10 +70,10 @@
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <IncludePath>C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)</IncludePath>
+    <IncludePath>C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)</IncludePath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <IncludePath>C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)</IncludePath>
+    <IncludePath>C:\boost_1_55_0\boost_1_55_0;C:\OpenSSL-Win64\include;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\include;$(IncludePath)</IncludePath>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
@@ -89,11 +89,11 @@
       <Optimization>Disabled</Optimization>
       <SDLCheck>false</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>C:\boost_1_55_0\boost_1_55_0;E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl;C:\OpenSSL-Win64\include;C:\Program Files %28x86%29\AMD APP SDK\3.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>C:\boost_1_55_0\boost_1_55_0;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl;C:\OpenSSL-Win64\include;C:\Program Files %28x86%29\AMD APP SDK\3.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
     <Link>
-      <AdditionalLibraryDirectories>C:\OpenSSL-Win64\lib;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>C:\OpenSSL-Win64\lib;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <AdditionalDependencies>OpenCL.lib;libcurl.lib;libeay32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
@@ -120,13 +120,13 @@
       <SDLCheck>false</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>E:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl;C:\OpenSSL-Win64\include;C:\Program Files %28x86%29\AMD APP SDK\3.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl;C:\OpenSSL-Win64\include;C:\Program Files %28x86%29\AMD APP SDK\3.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
       <AdditionalDependencies>OpenCL.lib;libcurl.lib;libeay32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>C:\OpenSSL-Win64\lib;F:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>C:\OpenSSL-Win64\lib;D:\Ergo\libcurl-vc-x64-release-dll-ipv6-sspi-winssl\lib;C:\Program Files %28x86%29\AMD APP SDK\3.0\lib\x86_64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>

diff --git a/Hiveos/ergoopenclV2-2.0.tar.gz b/Hiveos/ergoopenclV2-2.0.tar.gz
diff --git a/Hiveos/ergoopenclV2-2.1.tar.gz b/Hiveos/ergoopenclV2-2.1.tar.gz
diff --git a/MiningKernel.cl b/MiningKernel.cl
@@ -22,6 +22,8 @@ const  __constant cl_ulong ivals[8] = {
 
 
 __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
+				const cl_uint  n_len,
+
 	// precalculated hashes
 	global const cl_uint* hashes,
 	// intermediate Hashes
@@ -111,15 +113,16 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
 			//((uint8_t*)&h2)[5] = ((uint8_t*)r)[26];
 			//((uint8_t*)&h2)[6] = ((uint8_t*)r)[25];
 			//((uint8_t*)&h2)[7] = ((uint8_t*)r)[24];
+			h3 = h2 % n_len;
 
-			h3 = h2 % N_LEN;
-			//--------------------------read hash from lookup
+		//--------------------------read hash from lookup
 			cl_uint tmpL;
 #pragma unroll 8
-			for (int i = 0; i < 8; ++i)
+			for (int i = 0; i < 32; ++i)
 			{
-				tmpL = hashes[(h3 << 3) + i];
-				reverseBytesInt(tmpL, r[7 - i]);
+				//tmpL = hashes[(h3 << 3) + i];
+				//reverseBytesInt(tmpL, r[7 - i]);
+				((uint8_t *)r)[31-i] = ((global uint8_t *)hashes)[h3 * 32 + i];
 			}
 			//------------------------------------------------------
 
@@ -180,6 +183,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
 }
 
 __kernel  void BlockMiningStep2(
+	const cl_uint N_MASK,
 	// boundary for puzzle
 	global const cl_uint* bound,
 	// data:  mes  
@@ -238,16 +242,16 @@ __kernel  void BlockMiningStep2(
 		((uint8_t *)r)[34] = ((uint8_t *)r)[2];
 		((uint8_t *)r)[35] = ((uint8_t *)r)[3];
 
+
 #pragma unroll
 		for (int k = 0; k < K_LEN; k += 4)
 		{
-			ind[k] = r[k >> 2] & N_MASK;
-			ind[k + 1] = ((r[k >> 2] << 8) | (r[(k >> 2) + 1] >> 24)) & N_MASK;
-			ind[k + 2] = ((r[k >> 2] << 16) | (r[(k >> 2) + 1] >> 16)) & N_MASK;
-			ind[k + 3] = ((r[k >> 2] << 24) | (r[(k >> 2) + 1] >> 8)) & N_MASK;
+			ind[k] = r[k >> 2] % N_MASK;
+			ind[k + 1] = ((r[k >> 2] << 8) | (r[(k >> 2) + 1] >> 24)) % N_MASK;
+			ind[k + 2] = ((r[k >> 2] << 16) | (r[(k >> 2) + 1] >> 16)) % N_MASK;
+			ind[k + 3] = ((r[k >> 2] << 24) | (r[(k >> 2) + 1] >> 8)) % N_MASK;
 		}
 
-
 		//================================================================//
 		//  Calculate result
 		//================================================================//
@@ -349,6 +353,7 @@ __kernel  void BlockMiningStep2(
 			fn_Add(r[8], 0, CV, r[8], CV);
 		}
 
+
 
 		//--------------------hash(f)--------------------
 		//====================================================================//
@@ -398,7 +403,8 @@ __kernel  void BlockMiningStep2(
 		//================================================================//
 		j = ((cl_ulong*)r)[3] < ((cl_ulong global*)bound)[3] || ((cl_ulong*)r)[3] == ((cl_ulong global*)bound)[3] && (((cl_ulong*)r)[2] < ((cl_ulong global*)bound)[2] || ((cl_ulong*)r)[2] == ((cl_ulong global*)bound)[2] && (((cl_ulong*)r)[1] < ((cl_ulong global*)bound)[1] || ((cl_ulong*)r)[1] == ((cl_ulong global*)bound)[1] && ((cl_ulong*)r)[0] < ((cl_ulong global*)bound)[0]));
 
-		if (j)//
+
+		if (j )//
 		{
 			cl_uint oldC = atomic_inc(vCount);
 

diff --git a/OCLdefs.h b/OCLdefs.h
@@ -46,7 +46,14 @@ typedef unsigned __int32        cl_uint;
 #define K_LEN              32
 
 // N: number of precalculated hashes
-#define N_LEN             0x4000000// kamtar az 64(BLOK_DIM) nabashad
+#define INIT_N_LEN 0x4000000
+#define MAX_N_LEN  0x7FC9FF98
+#define IncreaseStart (600*1024)
+#define IncreaseEnd (4198400)
+#define IncreasePeriodForN (50*1024)
+
+
+
 #define Sol_Index 0x3381BF + 10
 ////////////////////////////////////////////////////////////////////////////////
 //  PARAMETERS: Heuristic prehash kernel parameters
@@ -206,17 +213,6 @@ struct ctx_t;
     * BLOCK_DIM                                                                \
 )
 
-// necessary workspace size
-#define WORKSPACE_SIZE_8                                                       \
-(                                                                              \
-    (                                                                          \
-        (cl_uint)((N_LEN << 1) + 1) * INDEX_SIZE_8                            \
-        > NONCES_PER_ITER * (NUM_SIZE_8  + (INDEX_SIZE_8 << 1)) + INDEX_SIZE_8 \
-    )?                                                                         \
-    (cl_uint)((N_LEN << 1) + 1) * INDEX_SIZE_8:                               \
-    NONCES_PER_ITER * (NUM_SIZE_8  + (INDEX_SIZE_8 << 1)) + INDEX_SIZE_8       \
-)
-
 //============================================================================//
 //  GPU shared memory
 //============================================================================//
@@ -242,11 +238,7 @@ struct ctx_t;
 #define ROUND_NC_SIZE_32   (NC_SIZE_32_BLOCK * BLOCK_DIM)
 
 //============================================================================//
-//  Heuristic CUDA parameters
 //============================================================================//
-// mod 2^26 mask
-#define N_MASK             (N_LEN - 1)
-
 // number of threads per iteration
 #define THREADS_PER_ITER   (NONCES_PER_ITER / NONCES_PER_THREAD)
 

diff --git a/PreHashKernel.cl b/PreHashKernel.cl
@@ -89,6 +89,7 @@ inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f)
 __kernel void InitPrehash(
 	// data: height
 	const cl_uint  h,
+	const cl_uint  n_len,
 	// hashes
 	global cl_uint * hashes
 )
@@ -100,7 +101,7 @@ __kernel void InitPrehash(
 
 	tid = get_global_id(0);
 
-	if (tid < N_LEN)
+	if (tid < n_len)
 	{
 		ulong h[8];
 		ulong b[16];
@@ -164,7 +165,6 @@ __kernel void InitPrehash(
 		//====================================================================//
 #pragma unroll
 		for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210);
-
 		((__global uchar *)hashes)[tid * 32 + 31] = 0;
 
 	}

diff --git a/Ubuntu/MiningKernel.cl b/Ubuntu/MiningKernel.cl
@@ -22,6 +22,8 @@ const  __constant cl_ulong ivals[8] = {
 
 
 __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
+				const cl_uint  n_len,
+
 	// precalculated hashes
 	global const cl_uint* hashes,
 	// intermediate Hashes
@@ -111,15 +113,16 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
 			//((uint8_t*)&h2)[5] = ((uint8_t*)r)[26];
 			//((uint8_t*)&h2)[6] = ((uint8_t*)r)[25];
 			//((uint8_t*)&h2)[7] = ((uint8_t*)r)[24];
+			h3 = h2 % n_len;
 
-			h3 = h2 % N_LEN;
-			//--------------------------read hash from lookup
+		//--------------------------read hash from lookup
 			cl_uint tmpL;
 #pragma unroll 8
-			for (int i = 0; i < 8; ++i)
+			for (int i = 0; i < 32; ++i)
 			{
-				tmpL = hashes[(h3 << 3) + i];
-				reverseBytesInt(tmpL, r[7 - i]);
+				//tmpL = hashes[(h3 << 3) + i];
+				//reverseBytesInt(tmpL, r[7 - i]);
+				((uint8_t *)r)[31-i] = ((global uint8_t *)hashes)[h3 * 32 + i];
 			}
 			//------------------------------------------------------
 
@@ -180,6 +183,7 @@ __kernel void BlockMiningStep1(global const cl_uint *data, const cl_ulong base,
 }
 
 __kernel  void BlockMiningStep2(
+	const cl_uint N_MASK,
 	// boundary for puzzle
 	global const cl_uint* bound,
 	// data:  mes  
@@ -238,16 +242,16 @@ __kernel  void BlockMiningStep2(
 		((uint8_t *)r)[34] = ((uint8_t *)r)[2];
 		((uint8_t *)r)[35] = ((uint8_t *)r)[3];
 
+
 #pragma unroll
 		for (int k = 0; k < K_LEN; k += 4)
 		{
-			ind[k] = r[k >> 2] & N_MASK;
-			ind[k + 1] = ((r[k >> 2] << 8) | (r[(k >> 2) + 1] >> 24)) & N_MASK;
-			ind[k + 2] = ((r[k >> 2] << 16) | (r[(k >> 2) + 1] >> 16)) & N_MASK;
-			ind[k + 3] = ((r[k >> 2] << 24) | (r[(k >> 2) + 1] >> 8)) & N_MASK;
+			ind[k] = r[k >> 2] % N_MASK;
+			ind[k + 1] = ((r[k >> 2] << 8) | (r[(k >> 2) + 1] >> 24)) % N_MASK;
+			ind[k + 2] = ((r[k >> 2] << 16) | (r[(k >> 2) + 1] >> 16)) % N_MASK;
+			ind[k + 3] = ((r[k >> 2] << 24) | (r[(k >> 2) + 1] >> 8)) % N_MASK;
 		}
 
-
 		//================================================================//
 		//  Calculate result
 		//================================================================//
@@ -349,6 +353,7 @@ __kernel  void BlockMiningStep2(
 			fn_Add(r[8], 0, CV, r[8], CV);
 		}
 
+
 
 		//--------------------hash(f)--------------------
 		//====================================================================//
@@ -398,7 +403,8 @@ __kernel  void BlockMiningStep2(
 		//================================================================//
 		j = ((cl_ulong*)r)[3] < ((cl_ulong global*)bound)[3] || ((cl_ulong*)r)[3] == ((cl_ulong global*)bound)[3] && (((cl_ulong*)r)[2] < ((cl_ulong global*)bound)[2] || ((cl_ulong*)r)[2] == ((cl_ulong global*)bound)[2] && (((cl_ulong*)r)[1] < ((cl_ulong global*)bound)[1] || ((cl_ulong*)r)[1] == ((cl_ulong global*)bound)[1] && ((cl_ulong*)r)[0] < ((cl_ulong global*)bound)[0]));
 
-		if (j)//
+
+		if (j )//
 		{
 			cl_uint oldC = atomic_inc(vCount);
 

diff --git a/Ubuntu/OCLdefs.h b/Ubuntu/OCLdefs.h
@@ -46,7 +46,14 @@ typedef unsigned __int32        cl_uint;
 #define K_LEN              32
 
 // N: number of precalculated hashes
-#define N_LEN             0x4000000// kamtar az 64(BLOK_DIM) nabashad
+#define INIT_N_LEN 0x4000000
+#define MAX_N_LEN  0x7FC9FF98
+#define IncreaseStart (600*1024)
+#define IncreaseEnd (4198400)
+#define IncreasePeriodForN (50*1024)
+
+
+
 #define Sol_Index 0x3381BF + 10
 ////////////////////////////////////////////////////////////////////////////////
 //  PARAMETERS: Heuristic prehash kernel parameters
@@ -206,17 +213,6 @@ struct ctx_t;
     * BLOCK_DIM                                                                \
 )
 
-// necessary workspace size
-#define WORKSPACE_SIZE_8                                                       \
-(                                                                              \
-    (                                                                          \
-        (cl_uint)((N_LEN << 1) + 1) * INDEX_SIZE_8                            \
-        > NONCES_PER_ITER * (NUM_SIZE_8  + (INDEX_SIZE_8 << 1)) + INDEX_SIZE_8 \
-    )?                                                                         \
-    (cl_uint)((N_LEN << 1) + 1) * INDEX_SIZE_8:                               \
-    NONCES_PER_ITER * (NUM_SIZE_8  + (INDEX_SIZE_8 << 1)) + INDEX_SIZE_8       \
-)
-
 //============================================================================//
 //  GPU shared memory
 //============================================================================//
@@ -242,11 +238,7 @@ struct ctx_t;
 #define ROUND_NC_SIZE_32   (NC_SIZE_32_BLOCK * BLOCK_DIM)
 
 //============================================================================//
-//  Heuristic CUDA parameters
 //============================================================================//
-// mod 2^26 mask
-#define N_MASK             (N_LEN - 1)
-
 // number of threads per iteration
 #define THREADS_PER_ITER   (NONCES_PER_ITER / NONCES_PER_THREAD)
 

diff --git a/Ubuntu/PreHashKernel.cl b/Ubuntu/PreHashKernel.cl
@@ -89,6 +89,7 @@ inline void BlakeCompress(ulong *h, const ulong *m, ulong t, ulong f)
 __kernel void InitPrehash(
 	// data: height
 	const cl_uint  h,
+	const cl_uint  n_len,
 	// hashes
 	global cl_uint * hashes
 )
@@ -100,7 +101,7 @@ __kernel void InitPrehash(
 
 	tid = get_global_id(0);
 
-	if (tid < N_LEN)
+	if (tid < n_len)
 	{
 		ulong h[8];
 		ulong b[16];
@@ -164,7 +165,6 @@ __kernel void InitPrehash(
 		//====================================================================//
 #pragma unroll
 		for (int i = 0; i < 4; ++i) ((__global ulong *)hashes)[(tid + 1) * 4 - i - 1] = as_ulong(as_uchar8(h[i]).s76543210);
-
 		((__global uchar *)hashes)[tid * 32 + 31] = 0;
 
 	}

diff --git a/clMining.cpp b/clMining.cpp
@@ -66,13 +66,16 @@ void MiningClass::hBlockMining(
 	// indices of valid solutions
 	cl_mem valid,
 	cl_mem vCount,
-	cl_mem BHashes
+	uint64_t N_LEN , 
+	cl_mem BHashes 
+
 	)
 {
 	cl_kernel kernelStep1 = program->getKernel("BlockMiningStep1");
 	int id = 0;
 	cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_mem), &mes));
 	cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_ulong), &base));
+	cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_uint), &N_LEN));
 	cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_mem), &hashes));
 	cl->checkError(clSetKernelArg(kernelStep1, id++, sizeof(cl_mem), &BHashes));
 
@@ -86,8 +89,10 @@ void MiningClass::hBlockMining(
 	cl->checkError(err);
 
 	//--------------------------------
+
 	cl_kernel kernelStep2 = program->getKernel("BlockMiningStep2");
 	id = 0;
+	cl->checkError(clSetKernelArg(kernelStep2, id++, sizeof(cl_uint), &N_LEN));
 	cl->checkError(clSetKernelArg(kernelStep2, id++, sizeof(cl_mem), &bound));
 	cl->checkError(clSetKernelArg(kernelStep2, id++, sizeof(cl_mem), &mes));
 	cl->checkError(clSetKernelArg(kernelStep2, id++, sizeof(cl_ulong), &base));

diff --git a/clMining.h b/clMining.h
@@ -47,7 +47,8 @@ class MiningClass
 		// indices of valid solutions
 		cl_mem valid,
 		cl_mem vCount,
-		cl_mem BHashes = NULL
+		uint64_t N_LEN,
+		cl_mem BHashes = NULL 
 	);
 };
 
diff --git a/clPreHash.cpp b/clPreHash.cpp
@@ -13,6 +13,7 @@ PreHashClass::~PreHashClass()
 }
 
 int PreHashClass::Prehash(
+	uint64_t N_LEN,
 	// data: height
 	cl_uint  height,
 	// hashes
@@ -25,7 +26,8 @@ int PreHashClass::Prehash(
 	cl_kernel kernel = program->getKernel("InitPrehash");
 
 	cl->checkError(clSetKernelArg(kernel, 0, sizeof(cl_uint), &height));
-	cl->checkError(clSetKernelArg(kernel, 1, sizeof(cl_mem), &hashes));
+	cl->checkError(clSetKernelArg(kernel, 1, sizeof(cl_uint), &N_LEN));
+	cl->checkError(clSetKernelArg(kernel, 2, sizeof(cl_mem), &hashes));