Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for staking code and thread shutdown #505

Closed
wants to merge 10 commits into from
87 changes: 42 additions & 45 deletions src/komodo_bitcoind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2247,35 +2247,36 @@ int64_t komodo_coinsupply(int64_t *zfundsp,int64_t *sproutfundsp,int32_t height)
return(supply);
}

struct komodo_staking *komodo_addutxo(struct komodo_staking *array,int32_t *numkp,int32_t *maxkp,uint32_t txtime,uint64_t nValue,uint256 txid,int32_t vout,char *address,uint8_t *hashbuf,CScript pk)
void komodo_addutxo(std::vector<komodo_staking> &array,uint32_t txtime,uint64_t nValue,uint256 txid,int32_t vout,char *address,uint8_t *hashbuf,CScript pk)
{
uint256 hash; uint32_t segid32; struct komodo_staking *kp;
uint256 hash; uint32_t segid32; komodo_staking kp;
segid32 = komodo_stakehash(&hash,address,hashbuf,txid,vout);
if ( *numkp >= *maxkp )
{
*maxkp += 1000;
array = (struct komodo_staking *)realloc(array,sizeof(*array) * (*maxkp));
//fprintf(stderr,"realloc max.%d array.%p\n",*maxkp,array);
}
kp = &array[(*numkp)++];
//fprintf(stderr,"kp.%p num.%d\n",kp,*numkp);
memset(kp,0,sizeof(*kp));
strcpy(kp->address,address);
kp->txid = txid;
kp->vout = vout;
kp->hashval = UintToArith256(hash);
kp->txtime = txtime;
kp->segid32 = segid32;
kp->nValue = nValue;
kp->scriptPubKey = pk;
return(array);
if ( array.size() >= array.capacity() )
{
array.reserve(array.capacity() + 1000);
//fprintf(stderr,"%s realloc array.size().%d array.capacity().%d\n", __func__, array.size(), array.capacity());
}
//memset(&kp,0,sizeof(kp));
strcpy(kp.address, address);
kp.txid = txid;
kp.vout = vout;
kp.hashval = UintToArith256(hash);
kp.txtime = txtime;
kp.segid32 = segid32;
kp.nValue = nValue;
kp.scriptPubKey = pk;
array.push_back(kp);
//fprintf(stderr,"kp.%p array.size().%d\n",kp,array.size());
}

int32_t komodo_staked(CMutableTransaction &txNew,uint32_t nBits,uint32_t *blocktimep,uint32_t *txtimep,uint256 *utxotxidp,int32_t *utxovoutp,uint64_t *utxovaluep,uint8_t *utxosig, uint256 merkleroot)
{
static struct komodo_staking *array; static int32_t numkp,maxkp; static uint32_t lasttime;
// use thread_local to prevent crash in case of accidental thread overlapping
thread_local std::vector<komodo_staking> array;
thread_local uint32_t lasttime;

int32_t PoSperc = 0, newStakerActive;
std::set<CBitcoinAddress> setAddress; struct komodo_staking *kp; int32_t winners,segid,minage,nHeight,counter=0,i,m,siglen=0,nMinDepth = 1,nMaxDepth = 99999999; std::vector<COutput> vecOutputs; uint32_t block_from_future_rejecttime,besttime,eligible,earliest = 0; CScript best_scriptPubKey; arith_uint256 mindiff,ratio,bnTarget,tmpTarget; CBlockIndex *tipindex,*pindex; CTxDestination address; bool fNegative,fOverflow; uint8_t hashbuf[256]; CTransaction tx; uint256 hashBlock;
std::set<CBitcoinAddress> setAddress; int32_t winners,segid,minage,nHeight,counter=0,i,m,siglen=0,nMinDepth = 1,nMaxDepth = 99999999; std::vector<COutput> vecOutputs; uint32_t block_from_future_rejecttime,besttime,eligible,earliest = 0; CScript best_scriptPubKey; arith_uint256 mindiff,ratio,bnTarget,tmpTarget; CBlockIndex *tipindex,*pindex; CTxDestination address; bool fNegative,fOverflow; uint8_t hashbuf[256]; CTransaction tx; uint256 hashBlock;
uint64_t cbPerc = *utxovaluep, tocoinbase = 0;
if (!EnsureWalletIsAvailable(0))
return 0;
Expand All @@ -2297,7 +2298,7 @@ int32_t komodo_staked(CMutableTransaction &txNew,uint32_t nBits,uint32_t *blockt
// this was for VerusHash PoS64
//tmpTarget = komodo_PoWtarget(&PoSperc,bnTarget,nHeight,ASSETCHAINS_STAKED);
bool resetstaker = false;
if ( array != 0 )
if ( array.size() != 0 )
{
LOCK(cs_main);
CBlockIndex* pblockindex = chainActive[tipindex->nHeight];
Expand All @@ -2309,15 +2310,13 @@ int32_t komodo_staked(CMutableTransaction &txNew,uint32_t nBits,uint32_t *blockt
}
}

if ( resetstaker || array == 0 || time(NULL) > lasttime+600 )
if ( resetstaker || array.size() == 0 || time(NULL) > lasttime+600 )
{
LOCK2(cs_main, pwalletMain->cs_wallet);
pwalletMain->AvailableCoins(vecOutputs, false, NULL, true);
if ( array != 0 )
if ( array.size() != 0 )
{
free(array);
array = 0;
maxkp = numkp = 0;
array.clear();
lasttime = 0;
}
BOOST_FOREACH(const COutput& out, vecOutputs)
Expand All @@ -2343,16 +2342,16 @@ int32_t komodo_staked(CMutableTransaction &txNew,uint32_t nBits,uint32_t *blockt
continue;
if ( myGetTransaction(out.tx->GetHash(),tx,hashBlock) != 0 && (pindex= komodo_getblockindex(hashBlock)) != 0 )
{
array = komodo_addutxo(array,&numkp,&maxkp,(uint32_t)pindex->nTime,(uint64_t)nValue,out.tx->GetHash(),out.i,(char *)CBitcoinAddress(address).ToString().c_str(),hashbuf,(CScript)pk);
//fprintf(stderr,"addutxo numkp.%d vs max.%d\n",numkp,maxkp);
komodo_addutxo(array,(uint32_t)pindex->nTime,(uint64_t)nValue,out.tx->GetHash(),out.i,(char *)CBitcoinAddress(address).ToString().c_str(),hashbuf,(CScript)pk);
//fprintf(stderr,"%s array.size().%d vs array.capacity().%d\n", __func__,array.size(),array.capacity());
}
}
}
lasttime = (uint32_t)time(NULL);
//fprintf(stderr,"finished kp data of utxo for staking %u ht.%d numkp.%d maxkp.%d\n",(uint32_t)time(NULL),nHeight,numkp,maxkp);
//fprintf(stderr,"%s finished kp data of utxo for staking %u ht.%d array.size().%d array.capacity().%d\n", __func__,(uint32_t)time(NULL),nHeight,array.size(),array.capacity());
}
block_from_future_rejecttime = (uint32_t)GetTime() + ASSETCHAINS_STAKED_BLOCK_FUTURE_MAX;
for (i=winners=0; i<numkp; i++)
for (i=winners=0; i<array.size(); i++)
{
if ( fRequestShutdown || !GetBoolArg("-gen",false) )
return(0);
Expand All @@ -2361,23 +2360,23 @@ int32_t komodo_staked(CMutableTransaction &txNew,uint32_t nBits,uint32_t *blockt
fprintf(stderr,"[%s:%d] chain tip changed during staking loop t.%u counter.%d\n",ASSETCHAINS_SYMBOL,nHeight,(uint32_t)time(NULL),i);
return(0);
}
kp = &array[i];
eligible = komodo_stake(0,bnTarget,nHeight,kp->txid,kp->vout,0,(uint32_t)tipindex->nTime+ASSETCHAINS_STAKED_BLOCK_FUTURE_HALF,kp->address,PoSperc);
komodo_staking &kp = array[i];
eligible = komodo_stake(0,bnTarget,nHeight,kp.txid,kp.vout,0,(uint32_t)tipindex->nTime+ASSETCHAINS_STAKED_BLOCK_FUTURE_HALF,kp.address,PoSperc);
if ( eligible > 0 )
{
besttime = 0;
if ( eligible == komodo_stake(1,bnTarget,nHeight,kp->txid,kp->vout,eligible,(uint32_t)tipindex->nTime+ASSETCHAINS_STAKED_BLOCK_FUTURE_HALF,kp->address,PoSperc) )
if ( eligible == komodo_stake(1,bnTarget,nHeight,kp.txid,kp.vout,eligible,(uint32_t)tipindex->nTime+ASSETCHAINS_STAKED_BLOCK_FUTURE_HALF,kp.address,PoSperc) )
{
// have elegible utxo to stake with.
if ( earliest == 0 || eligible < earliest || (eligible == earliest && (*utxovaluep == 0 || kp->nValue < *utxovaluep)) )
if ( earliest == 0 || eligible < earliest || (eligible == earliest && (*utxovaluep == 0 || kp.nValue < *utxovaluep)) )
{
// is better than the previous best, so use it instead.
earliest = eligible;
best_scriptPubKey = kp->scriptPubKey;
*utxovaluep = (uint64_t)kp->nValue;
decode_hex((uint8_t *)utxotxidp,32,(char *)kp->txid.GetHex().c_str());
*utxovoutp = kp->vout;
*txtimep = kp->txtime;
best_scriptPubKey = kp.scriptPubKey;
*utxovaluep = (uint64_t)kp.nValue;
decode_hex((uint8_t *)utxotxidp,32,(char *)kp.txid.GetHex().c_str());
*utxovoutp = kp.vout;
*txtimep = kp.txtime;
}
/*if ( eligible < block_from_future_rejecttime )
{
Expand All @@ -2388,11 +2387,9 @@ int32_t komodo_staked(CMutableTransaction &txNew,uint32_t nBits,uint32_t *blockt
}
}
}
if ( numkp < 500 && array != 0 )
if ( array.size() < 500 && array.size() != 0 )
{
free(array);
array = 0;
maxkp = numkp = 0;
array.clear();
lasttime = 0;
}
if ( earliest != 0 )
Expand Down
2 changes: 1 addition & 1 deletion src/komodo_bitcoind.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,6 @@ struct komodo_staking
CScript scriptPubKey;
};

struct komodo_staking *komodo_addutxo(struct komodo_staking *array,int32_t *numkp,int32_t *maxkp,uint32_t txtime,uint64_t nValue,uint256 txid,int32_t vout,char *address,uint8_t *hashbuf,CScript pk);
void komodo_addutxo(std::vector<komodo_staking> &array,uint32_t txtime,uint64_t nValue,uint256 txid,int32_t vout,char *address,uint8_t *hashbuf,CScript pk);

int32_t komodo_staked(CMutableTransaction &txNew,uint32_t nBits,uint32_t *blocktimep,uint32_t *txtimep,uint256 *utxotxidp,int32_t *utxovoutp,uint64_t *utxovaluep,uint8_t *utxosig, uint256 merkleroot);
30 changes: 19 additions & 11 deletions src/miner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ int32_t komodo_waituntilelegible(uint32_t blocktime, int32_t stakeHeight, uint32
int64_t adjustedtime = (int64_t)GetTime();
while ( (int64_t)blocktime-ASSETCHAINS_STAKED_BLOCK_FUTURE_MAX > adjustedtime )
{
boost::this_thread::interruption_point(); // allow to interrupt
int64_t secToElegible = (int64_t)blocktime-ASSETCHAINS_STAKED_BLOCK_FUTURE_MAX-adjustedtime;
if ( delay <= ASSETCHAINS_STAKED_BLOCK_FUTURE_HALF && secToElegible <= ASSETCHAINS_STAKED_BLOCK_FUTURE_HALF )
break;
Expand All @@ -178,7 +179,8 @@ int32_t komodo_waituntilelegible(uint32_t blocktime, int32_t stakeHeight, uint32
}
if( !GetBoolArg("-gen",false) )
return(0);
sleep(1);
//sleep(1);
boost::this_thread::sleep_for(boost::chrono::seconds(1)); // allow to interrupt
adjustedtime = (int64_t)GetTime();
}
return(1);
Expand Down Expand Up @@ -268,7 +270,7 @@ CBlockTemplate* CreateNewBlock(CPubKey _pk,const CScript& _scriptPubKeyIn, int32
{
proposedTime = GetTime();
if (proposedTime == nMedianTimePast)
MilliSleep(10);
MilliSleep(10); // allow to interrupt
}
}
pblock->nTime = GetTime();
Expand Down Expand Up @@ -1145,7 +1147,7 @@ void waitForPeers(const CChainParams &chainparams)
do {
if (fvNodesEmpty)
{
MilliSleep(1000 + rand() % 4000);
MilliSleep(1000 + rand() % 4000); // allow to interrupt
boost::this_thread::interruption_point();
LOCK(cs_vNodes);
fvNodesEmpty = vNodes.empty();
Expand All @@ -1162,13 +1164,13 @@ void waitForPeers(const CChainParams &chainparams)
{
if (++loops <= 10)
{
MilliSleep(1000);
MilliSleep(1000); // allow to interrupt
}
else break;
}
}
} while (fvNodesEmpty || IsNotInSync());
MilliSleep(100 + rand() % 400);
MilliSleep(100 + rand() % 400); // allow to interrupt
}
}
}
Expand Down Expand Up @@ -1217,7 +1219,8 @@ void static BitcoinMiner()
uint8_t *script; uint64_t total; int32_t i,j,gpucount=KOMODO_MAXGPUCOUNT,notaryid = -1;
while ( (ASSETCHAIN_INIT == 0 || KOMODO_INITDONE == 0) )
{
sleep(1);
//sleep(1);
boost::this_thread::sleep_for(boost::chrono::seconds(1)); // allow to interrupt
if ( komodo_baseid(ASSETCHAINS_SYMBOL) < 0 )
break;
}
Expand Down Expand Up @@ -1264,7 +1267,7 @@ void static BitcoinMiner()
}
if (!fvNodesEmpty )//&& !IsInitialBlockDownload())
break;
MilliSleep(15000);
MilliSleep(15000); // allow to interrupt
//fprintf(stderr,"fvNodesEmpty %d IsInitialBlockDownload(%s) %d\n",(int32_t)fvNodesEmpty,ASSETCHAINS_SYMBOL,(int32_t)IsInitialBlockDownload());

} while (true);
Expand Down Expand Up @@ -1305,7 +1308,8 @@ void static BitcoinMiner()
static uint32_t counter;
if ( counter++ < 10 && ASSETCHAINS_STAKED == 0 )
fprintf(stderr,"created illegal blockB, retry\n");
sleep(1);
//sleep(1);
boost::this_thread::sleep_for(boost::chrono::seconds(1)); // allow to interrupt
continue;
}
//fprintf(stderr,"get template\n");
Expand All @@ -1330,7 +1334,8 @@ void static BitcoinMiner()
static uint32_t counter;
if ( counter++ < 10 )
fprintf(stderr,"skip generating %s on-demand block, no tx avail\n",ASSETCHAINS_SYMBOL);
sleep(10);
//sleep(10);
boost::this_thread::sleep_for(boost::chrono::seconds(10)); // allow to interrupt
continue;
} else fprintf(stderr,"%s vouts.%d mining.%d vs %d\n",ASSETCHAINS_SYMBOL,(int32_t)pblock->vtx[0].vout.size(),Mining_height,ASSETCHAINS_MINHEIGHT);
}
Expand Down Expand Up @@ -1524,7 +1529,8 @@ void static BitcoinMiner()
//fprintf(stderr,"need to wait %d seconds to submit block\n",(int32_t)(B.nTime - GetAdjustedTime()));
while ( GetTime() < B.nTime-2 )
{
sleep(1);
//sleep(1);
boost::this_thread::sleep_for(boost::chrono::seconds(1)); // allow to interrupt
if ( chainActive.LastTip()->nHeight >= Mining_height )
{
fprintf(stderr,"new block arrived\n");
Expand All @@ -1538,7 +1544,7 @@ void static BitcoinMiner()
{
int32_t r;
if ( (r= ((Mining_height + NOTARY_PUBKEY33[16]) % 64) / 8) > 0 )
MilliSleep((rand() % (r * 1000)) + 1000);
MilliSleep((rand() % (r * 1000)) + 1000); // allow to interrupt
}
}
else
Expand Down Expand Up @@ -1744,6 +1750,8 @@ void static BitcoinMiner()
if (minerThreads != NULL)
{
minerThreads->interrupt_all();
// std::cout << "Waiting for mining threads to stop..." << std::endl;
minerThreads->join_all(); // prevent thread overlapping

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the lack of interruption points, using both interrupt_all() and join_all() can (and "randomly" will) bring everything to a grinding halt when between interruptibles and unjoinable.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point about lack of interruption points.
Currently we do not have join what leads to inaccurate thread shutdown.
And adding join_all works well in marmara and fixed a crash when a user called 'setgenerate false' and 'setgenerate true' quickly in a sequence.
But I missed an extra interruption_point (from the marmara code) in komodo_waituntilelegible which does a long loop which would make thread shutdowns faster, adding it

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and fixed a crash when a user called 'setgenerate false' and 'setgenerate true' quickly in a sequence.

This is actually where adding the join_all() creates the issue. Most specifically, it creates an issue with NN mining and CreateNewBlock (which MCL doesn't use).

Copy link
Collaborator Author

@dimxy dimxy Jul 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain what issue you mean? I believe joins may create an issue when a thread can never be joined and hangs because it may have a long loop without interruption points or sleep or wait calls. If we have such loops we need to fix this to provide thread graceful shutdown to work.
(I am going to run my dev NN node on this branch for testing)

Copy link

@TheComputerGenie TheComputerGenie Jul 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BitcoinMiner only contains a single interruption_point()
That point comes after the while loop at:

while ( GetTime() < B.nTime-2 )

which can hold up to 17.5 minutes, as set by:
pblock->nTime += (r % (33 - gpucount)*(33 - gpucount));

Ironically, the likelihood of that longest pause is increased by the new "stall reduction" code increasing the possibility for gpucount to reach 0 (assuming the rand hits 1056, which is possible).

Being async, the thought would be that this part would stick on its own and there would be no care; however, in reality, there are about a dozen circumstances where it locks everything (one of those circumstances being that threads are ignorant of each other and on advanced hardware come back with multiple solves when many miner threads are used).

When you look at what "should be" vs "what is", it shouldn't be a problem because NNs "should be" only running one thread; however, this is becoming decreasingly true with more and more NNs seeking to hit smaller and more predictable gaps.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good catch, this loop.
However I believe when a user calls setgenerate false and then setgenerate true without join there will be two running threads for some time and this is basically not good at all (in a staking chain this could even create a crash as there is a static komodo_staking *array var that could be corrupted in this case). So I think threads should be stopped gracefully by joining.
This loop you mentioned has sleep() inside and we can replace it on boost::this_thread::sleep_for function (which allows to interrupt the thread) and we should check other remaining loops in miner.cpp to add interruption points in a similar way

Copy link

@TheComputerGenie TheComputerGenie Jul 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and we can replace it on boost::this_thread::sleep_for function (which allows to interrupt the thread) and we should check other remaining loops in miner.cpp to add interruption points in a similar way

However you want to do it, just want to make sure that you/everyone is aware that doing it as-is will lock up NNs; so, whatever way it's done to protect stakers that could crash needs to be done in such a way as to protect both.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure about your doubts though.
Deleting a thread object without join is an obvious bug IMO and should be fixed.
Fixing it may add some delay on daemon stopping or setgenerate false but this is not a lock-up if we do this properly.
Checking all loops in miner.cpp...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw this code does not work at all:

boost::this_thread::disable_interruption();
as 'disable_interruption' is a type and to activate it we need to create a local var.
Maybe we should fix this too as it was intended

Copy link

@TheComputerGenie TheComputerGenie Jul 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haven't fully tested it yet, but (along with the others)

boost::this_thread::sleep_for(boost::chrono::seconds(1)); // allow to interrupt

does look like it'll solve the issue of my concern. ty

delete minerThreads;
minerThreads = NULL;
}
Expand Down