From ffc54e4a48e3e1e03cb9939a4c1e34b24f1a6e42 Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Mon, 6 Jan 2025 12:38:26 -0800 Subject: [PATCH] Implement parallel ledger close, off by default --- src/bucket/BucketListBase.cpp | 2 - src/bucket/BucketListSnapshotBase.cpp | 2 - src/bucket/BucketManager.cpp | 76 ++- src/bucket/BucketManager.h | 21 +- src/bucket/BucketSnapshotManager.cpp | 3 - src/bucket/LiveBucketList.h | 3 + src/bucket/SearchableBucketList.cpp | 2 - src/bucket/test/BucketListTests.cpp | 2 +- src/bucket/test/BucketManagerTests.cpp | 26 +- src/bucket/test/BucketTestUtils.cpp | 8 +- src/catchup/ApplyCheckpointWork.cpp | 8 +- src/catchup/ApplyLedgerWork.cpp | 3 +- src/catchup/CatchupManager.h | 11 +- src/catchup/CatchupManagerImpl.cpp | 204 +++++--- src/catchup/CatchupManagerImpl.h | 35 +- src/catchup/CatchupWork.cpp | 15 +- src/catchup/ReplayDebugMetaWork.cpp | 3 +- src/database/Database.cpp | 2 +- src/database/test/DatabaseTests.cpp | 13 +- src/herder/Herder.h | 5 +- src/herder/HerderImpl.cpp | 71 ++- src/herder/HerderImpl.h | 5 +- src/herder/HerderPersistence.h | 1 - src/herder/HerderPersistenceImpl.cpp | 76 +-- src/herder/HerderSCPDriver.cpp | 31 +- src/herder/HerderSCPDriver.h | 1 + src/herder/PendingEnvelopes.cpp | 4 +- src/herder/PendingEnvelopes.h | 4 + src/herder/TxSetFrame.cpp | 21 +- src/herder/Upgrades.cpp | 20 +- src/herder/test/HerderTests.cpp | 116 +++-- src/history/HistoryManager.h | 6 +- src/history/HistoryManagerImpl.cpp | 13 +- src/history/HistoryManagerImpl.h | 6 +- .../BucketListIsConsistentWithDatabase.cpp | 5 +- src/ledger/LedgerHeaderUtils.cpp | 38 +- src/ledger/LedgerHeaderUtils.h | 5 +- src/ledger/LedgerManager.h | 22 +- src/ledger/LedgerManagerImpl.cpp | 472 +++++++++++------- src/ledger/LedgerManagerImpl.h | 61 ++- src/ledger/test/InMemoryLedgerTxnRoot.cpp | 2 +- src/main/AppConnector.cpp | 21 +- src/main/AppConnector.h | 7 +- src/main/Application.cpp | 8 +- src/main/Application.h | 3 + src/main/ApplicationImpl.cpp | 85 +++- src/main/ApplicationImpl.h | 12 +- src/main/ApplicationUtils.cpp | 20 +- src/main/Maintainer.cpp | 4 +- src/main/test/ApplicationUtilsTests.cpp | 4 +- src/overlay/BanManagerImpl.cpp | 21 +- src/overlay/PeerManager.cpp | 24 +- src/overlay/test/OverlayManagerTests.cpp | 4 +- src/simulation/CoreTests.cpp | 3 +- src/simulation/LoadGenerator.cpp | 7 + src/simulation/TxGenerator.cpp | 2 +- src/test/FuzzerImpl.cpp | 10 +- src/test/TestUtils.cpp | 24 + src/test/TestUtils.h | 6 +- src/test/TxTests.cpp | 18 +- .../ExtendFootprintTTLOpFrame.cpp | 3 +- .../InvokeHostFunctionOpFrame.cpp | 3 +- src/transactions/OperationFrame.cpp | 21 +- src/transactions/OperationFrame.h | 1 + src/transactions/RestoreFootprintOpFrame.cpp | 3 +- src/transactions/TransactionFrame.cpp | 10 +- src/transactions/TransactionSQL.cpp | 9 +- src/transactions/test/SorobanTxTestUtils.cpp | 2 +- 68 files changed, 1136 insertions(+), 623 deletions(-) diff --git a/src/bucket/BucketListBase.cpp b/src/bucket/BucketListBase.cpp index 86daf45421..647e65d07a 100644 --- a/src/bucket/BucketListBase.cpp +++ b/src/bucket/BucketListBase.cpp @@ -57,7 +57,6 @@ template void BucketLevel::setNext(FutureBucket const& fb) { - releaseAssert(threadIsMain()); mNextCurr = fb; } @@ -79,7 +78,6 @@ template void BucketLevel::setCurr(std::shared_ptr b) { - releaseAssert(threadIsMain()); mNextCurr.clear(); mCurr = b; } diff --git a/src/bucket/BucketListSnapshotBase.cpp b/src/bucket/BucketListSnapshotBase.cpp index df4511b28b..cf2504fcfb 100644 --- a/src/bucket/BucketListSnapshotBase.cpp +++ b/src/bucket/BucketListSnapshotBase.cpp @@ -19,8 +19,6 @@ BucketListSnapshot::BucketListSnapshot( BucketListBase const& bl, LedgerHeader header) : mHeader(std::move(header)) { - releaseAssert(threadIsMain()); - for (uint32_t i = 0; i < BucketListBase::kNumLevels; ++i) { auto const& level = bl.getLevel(i); diff --git a/src/bucket/BucketManager.cpp b/src/bucket/BucketManager.cpp index dad3dcf7de..209d828caa 100644 --- a/src/bucket/BucketManager.cpp +++ b/src/bucket/BucketManager.cpp @@ -62,6 +62,7 @@ void BucketManager::initialize() { ZoneScoped; + releaseAssert(threadIsMain()); std::string d = mConfig.BUCKET_DIR_PATH; if (!fs::exists(d)) @@ -729,7 +730,7 @@ BucketManager::getBucketListReferencedBuckets() const } std::set -BucketManager::getAllReferencedBuckets() const +BucketManager::getAllReferencedBuckets(HistoryArchiveState const& has) const { ZoneScoped; auto referenced = getBucketListReferencedBuckets(); @@ -740,8 +741,7 @@ BucketManager::getAllReferencedBuckets() const // retain any bucket referenced by the last closed ledger as recorded in the // database (as merges complete, the bucket list drifts from that state) - auto lclHas = mApp.getLedgerManager().getLastClosedLedgerHAS(); - auto lclBuckets = lclHas.allBuckets(); + auto lclBuckets = has.allBuckets(); for (auto const& h : lclBuckets) { auto rit = referenced.emplace(hexToBin256(h)); @@ -752,39 +752,38 @@ BucketManager::getAllReferencedBuckets() const } // retain buckets that are referenced by a state in the publish queue. - auto pub = mApp.getHistoryManager().getBucketsReferencedByPublishQueue(); + for (auto const& h : + HistoryManager::getBucketsReferencedByPublishQueue(mApp.getConfig())) { - for (auto const& h : pub) + auto rhash = hexToBin256(h); + auto rit = referenced.emplace(rhash); + if (rit.second) { - auto rhash = hexToBin256(h); - auto rit = referenced.emplace(rhash); - if (rit.second) - { - CLOG_TRACE(Bucket, "{} referenced by publish queue", h); - - // Project referenced bucket `rhash` -- which might be a merge - // input captured before a merge finished -- through our weak - // map of merge input/output relationships, to find any outputs - // we'll want to retain in order to resynthesize the merge in - // the future, rather than re-run it. - mFinishedMerges.getOutputsUsingInput(rhash, referenced); - } + CLOG_TRACE(Bucket, "{} referenced by publish queue", h); + + // Project referenced bucket `rhash` -- which might be a merge + // input captured before a merge finished -- through our weak + // map of merge input/output relationships, to find any outputs + // we'll want to retain in order to resynthesize the merge in + // the future, rather than re-run it. + mFinishedMerges.getOutputsUsingInput(rhash, referenced); } } return referenced; } void -BucketManager::cleanupStaleFiles() +BucketManager::cleanupStaleFiles(HistoryArchiveState const& has) { ZoneScoped; + releaseAssert(threadIsMain()); if (mConfig.DISABLE_BUCKET_GC) { return; } std::lock_guard lock(mBucketMutex); - auto referenced = getAllReferencedBuckets(); + auto referenced = getAllReferencedBuckets(has); std::transform(std::begin(mSharedLiveBuckets), std::end(mSharedLiveBuckets), std::inserter(referenced, std::end(referenced)), [](std::pair> const& p) { @@ -818,11 +817,11 @@ BucketManager::cleanupStaleFiles() } void -BucketManager::forgetUnreferencedBuckets() +BucketManager::forgetUnreferencedBuckets(HistoryArchiveState const& has) { ZoneScoped; std::lock_guard lock(mBucketMutex); - auto referenced = getAllReferencedBuckets(); + auto referenced = getAllReferencedBuckets(has); auto blReferenced = getBucketListReferencedBuckets(); auto bucketMapLoop = [&](auto& bucketMap, auto& futureMap) { @@ -867,7 +866,7 @@ BucketManager::forgetUnreferencedBuckets() Bucket, "BucketManager::forgetUnreferencedBuckets dropping {}", filename); - if (!filename.empty() && !mApp.getConfig().DISABLE_BUCKET_GC) + if (!filename.empty() && !mConfig.DISABLE_BUCKET_GC) { CLOG_TRACE(Bucket, "removing bucket file: {}", filename); std::filesystem::remove(filename); @@ -1049,7 +1048,8 @@ BucketManager::maybeSetIndex(std::shared_ptr b, void BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq, - uint32_t ledgerVers) + uint32_t ledgerVers, + SorobanNetworkConfig const& cfg) { releaseAssert(mSnapshotManager); releaseAssert(!mEvictionFuture.valid()); @@ -1057,7 +1057,6 @@ BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq, auto searchableBL = mSnapshotManager->copySearchableLiveBucketListSnapshot(); - auto const& cfg = mApp.getLedgerManager().getSorobanNetworkConfigForApply(); auto const& sas = cfg.stateArchivalSettings(); using task_t = std::packaged_task; @@ -1078,31 +1077,27 @@ BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq, } EvictedStateVectors -BucketManager::resolveBackgroundEvictionScan(AbstractLedgerTxn& ltx, - uint32_t ledgerSeq, - LedgerKeySet const& modifiedKeys, - uint32_t ledgerVers) +BucketManager::resolveBackgroundEvictionScan( + AbstractLedgerTxn& ltx, uint32_t ledgerSeq, + LedgerKeySet const& modifiedKeys, uint32_t ledgerVers, + SorobanNetworkConfig& networkConfig) { ZoneScoped; - releaseAssert(threadIsMain()); releaseAssert(mEvictionStatistics); if (!mEvictionFuture.valid()) { - startBackgroundEvictionScan(ledgerSeq, ledgerVers); + startBackgroundEvictionScan(ledgerSeq, ledgerVers, networkConfig); } auto evictionCandidates = mEvictionFuture.get(); - auto const& networkConfig = - mApp.getLedgerManager().getSorobanNetworkConfigForApply(); - // If eviction related settings changed during the ledger, we have to // restart the scan if (!evictionCandidates.isValid(ledgerSeq, networkConfig.stateArchivalSettings())) { - startBackgroundEvictionScan(ledgerSeq, ledgerVers); + startBackgroundEvictionScan(ledgerSeq, ledgerVers, networkConfig); evictionCandidates = mEvictionFuture.get(); } @@ -1229,6 +1224,7 @@ BucketManager::assumeState(HistoryArchiveState const& has, uint32_t maxProtocolVersion, bool restartMerges) { ZoneScoped; + releaseAssert(threadIsMain()); releaseAssertOrThrow(mConfig.MODE_ENABLES_BUCKETLIST); // TODO: Assume archival bucket state @@ -1277,7 +1273,7 @@ BucketManager::assumeState(HistoryArchiveState const& has, mLiveBucketList->restartMerges(mApp, maxProtocolVersion, has.currentLedger); } - cleanupStaleFiles(); + cleanupStaleFiles(has); } void @@ -1378,7 +1374,7 @@ std::shared_ptr BucketManager::mergeBuckets(HistoryArchiveState const& has) { ZoneScoped; - + releaseAssert(threadIsMain()); std::map ledgerMap = loadCompleteLedgerState(has); BucketMetadata meta; MergeCounters mc; @@ -1568,9 +1564,11 @@ BucketManager::visitLedgerEntries( } std::shared_ptr -BucketManager::scheduleVerifyReferencedBucketsWork() +BucketManager::scheduleVerifyReferencedBucketsWork( + HistoryArchiveState const& has) { - std::set hashes = getAllReferencedBuckets(); + releaseAssert(threadIsMain()); + std::set hashes = getAllReferencedBuckets(has); std::vector> seq; for (auto const& h : hashes) { diff --git a/src/bucket/BucketManager.h b/src/bucket/BucketManager.h index 9f9724de78..d17517838f 100644 --- a/src/bucket/BucketManager.h +++ b/src/bucket/BucketManager.h @@ -70,6 +70,11 @@ class BucketManager : NonMovableOrCopyable static std::string const kLockFilename; + // NB: ideally, BucketManager should have no access to mApp, as it's too + // dangerous in the context of parallel application. BucketManager is quite + // bloated, with lots of legacy code, so to ensure safety, annotate all + // functions using mApp with `releaseAssert(threadIsMain())` and avoid + // accessing mApp in the background. Application& mApp; std::unique_ptr mLiveBucketList; std::unique_ptr mHotArchiveBucketList; @@ -124,7 +129,7 @@ class BucketManager : NonMovableOrCopyable std::atomic mIsShutdown{false}; - void cleanupStaleFiles(); + void cleanupStaleFiles(HistoryArchiveState const& has); void deleteTmpDirAndUnlockBucketDir(); void deleteEntireBucketDir(); @@ -260,7 +265,7 @@ class BucketManager : NonMovableOrCopyable // not immediately cause the buckets to delete themselves, if someone else // is using them via a shared_ptr<>, but the BucketManager will no longer // independently keep them alive. - void forgetUnreferencedBuckets(); + void forgetUnreferencedBuckets(HistoryArchiveState const& has); // Feed a new batch of entries to the bucket list. This interface expects to // be given separate init (created) and live (updated) entry vectors. The @@ -290,7 +295,8 @@ class BucketManager : NonMovableOrCopyable // Scans BucketList for non-live entries to evict starting at the entry // pointed to by EvictionIterator. Evicts until `maxEntriesToEvict` entries // have been evicted or maxEvictionScanSize bytes have been scanned. - void startBackgroundEvictionScan(uint32_t ledgerSeq, uint32_t ledgerVers); + void startBackgroundEvictionScan(uint32_t ledgerSeq, uint32_t ledgerVers, + SorobanNetworkConfig const& cfg); // Returns a pair of vectors representing entries evicted this ledger, where // the first vector constains all deleted keys (TTL and temporary), and @@ -300,7 +306,8 @@ class BucketManager : NonMovableOrCopyable EvictedStateVectors resolveBackgroundEvictionScan(AbstractLedgerTxn& ltx, uint32_t ledgerSeq, LedgerKeySet const& modifiedKeys, - uint32_t ledgerVers); + uint32_t ledgerVers, + SorobanNetworkConfig& networkConfig); medida::Meter& getBloomMissMeter() const; medida::Meter& getBloomLookupMeter() const; @@ -325,7 +332,8 @@ class BucketManager : NonMovableOrCopyable // Return the set of buckets referenced by the BucketList, LCL HAS, // and publish queue. - std::set getAllReferencedBuckets() const; + std::set + getAllReferencedBuckets(HistoryArchiveState const& has) const; // Check for missing bucket files that would prevent `assumeState` from // succeeding @@ -382,7 +390,8 @@ class BucketManager : NonMovableOrCopyable // Schedule a Work class that verifies the hashes of all referenced buckets // on background threads. - std::shared_ptr scheduleVerifyReferencedBucketsWork(); + std::shared_ptr + scheduleVerifyReferencedBucketsWork(HistoryArchiveState const& has); Config const& getConfig() const; diff --git a/src/bucket/BucketSnapshotManager.cpp b/src/bucket/BucketSnapshotManager.cpp index 0dffcc31ea..aaa85a3e44 100644 --- a/src/bucket/BucketSnapshotManager.cpp +++ b/src/bucket/BucketSnapshotManager.cpp @@ -98,7 +98,6 @@ BucketSnapshotManager::recordBulkLoadMetrics(std::string const& label, { // For now, only keep metrics for the main thread. We can decide on what // metrics make sense when more background services are added later. - releaseAssert(threadIsMain()); if (numEntries != 0) { @@ -153,8 +152,6 @@ BucketSnapshotManager::updateCurrentSnapshot( SnapshotPtrT&& liveSnapshot, SnapshotPtrT&& hotArchiveSnapshot) { - releaseAssert(threadIsMain()); - auto updateSnapshot = [numHistoricalSnapshots = mNumHistoricalSnapshots]( auto& currentSnapshot, auto& historicalSnapshots, auto&& newSnapshot) { diff --git a/src/bucket/LiveBucketList.h b/src/bucket/LiveBucketList.h index 0f2a6ac268..688f0acd22 100644 --- a/src/bucket/LiveBucketList.h +++ b/src/bucket/LiveBucketList.h @@ -9,6 +9,9 @@ namespace stellar { + +class SorobanNetworkConfig; + // The LiveBucketList stores the current canonical state of the ledger. It is // made up of LiveBucket buckets, which in turn store individual entries of type // BucketEntry. When an entry is "evicted" from the ledger, it is removed from diff --git a/src/bucket/SearchableBucketList.cpp b/src/bucket/SearchableBucketList.cpp index 60c66c31a4..d225a7c732 100644 --- a/src/bucket/SearchableBucketList.cpp +++ b/src/bucket/SearchableBucketList.cpp @@ -109,7 +109,6 @@ SearchableLiveBucketListSnapshot::loadPoolShareTrustLinesByAccountAndAsset( ZoneScoped; // This query should only be called during TX apply - releaseAssert(threadIsMain()); releaseAssert(mSnapshot); LedgerKeySet trustlinesToLoad; @@ -154,7 +153,6 @@ SearchableLiveBucketListSnapshot::loadInflationWinners(size_t maxWinners, // This is a legacy query, should only be called by main thread during // catchup - releaseAssert(threadIsMain()); auto timer = mSnapshotManager.recordBulkLoadMetrics("inflationWinners", 0) .TimeScope(); diff --git a/src/bucket/test/BucketListTests.cpp b/src/bucket/test/BucketListTests.cpp index 0a5b545097..5dfb6572f6 100644 --- a/src/bucket/test/BucketListTests.cpp +++ b/src/bucket/test/BucketListTests.cpp @@ -869,7 +869,7 @@ TEST_CASE_VERSIONS("network config snapshots BucketList size", "[bucketlist]") LedgerManagerForBucketTests& lm = app->getLedgerManager(); auto& networkConfig = - app->getLedgerManager().getSorobanNetworkConfigReadOnly(); + app->getLedgerManager().getMutableSorobanNetworkConfig(); uint32_t windowSize = networkConfig.stateArchivalSettings() .bucketListSizeWindowSampleSize; diff --git a/src/bucket/test/BucketManagerTests.cpp b/src/bucket/test/BucketManagerTests.cpp index 8eb393c789..5c22b3b997 100644 --- a/src/bucket/test/BucketManagerTests.cpp +++ b/src/bucket/test/BucketManagerTests.cpp @@ -237,7 +237,8 @@ TEST_CASE_VERSIONS("bucketmanager ownership", "[bucket][bucketmanager]") CHECK(fs::exists(indexFilename)); b.reset(); - app->getBucketManager().forgetUnreferencedBuckets(); + app->getBucketManager().forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); CHECK(!fs::exists(filename)); CHECK(!fs::exists(indexFilename)); }; @@ -260,7 +261,8 @@ TEST_CASE_VERSIONS("bucketmanager ownership", "[bucket][bucketmanager]") // This shouldn't change if we forget unreferenced buckets since // it's referenced by bucketlist. - app->getBucketManager().forgetUnreferencedBuckets(); + app->getBucketManager().forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); CHECK(b1.use_count() == 3); // But if we mutate the curr bucket of the bucketlist, it should. @@ -343,7 +345,8 @@ TEST_CASE_VERSIONS("bucketmanager reattach to finished merge", LedgerTestUtils::generateValidLedgerEntriesWithExclusions( {CONFIG_SETTING}, 10), {}); - bm.forgetUnreferencedBuckets(); + bm.forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); } while (!LiveBucketList::levelShouldSpill(ledger, level - 1)); // Check that the merge on level isn't committed (we're in @@ -433,7 +436,8 @@ TEST_CASE_VERSIONS("bucketmanager reattach to running merge", {CONFIG_SETTING}, 100), {}); - bm.forgetUnreferencedBuckets(); + bm.forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); HistoryArchiveState has(ledger, bl, app->getConfig().NETWORK_PASSPHRASE); @@ -517,8 +521,10 @@ TEST_CASE("bucketmanager do not leak empty-merge futures", bl.resolveAnyReadyFutures(); std::this_thread::sleep_for(std::chrono::seconds(1)); } - bm.forgetUnreferencedBuckets(); - auto bmRefBuckets = bm.getAllReferencedBuckets(); + bm.forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); + auto bmRefBuckets = bm.getAllReferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); auto bmDirBuckets = bm.getBucketHashesInBucketDirForTesting(); // Remove the 0 bucket in case it's "referenced"; it's never a file. @@ -574,16 +580,18 @@ TEST_CASE_VERSIONS( {CONFIG_SETTING}, 100), {}); clock.crank(false); - bm.forgetUnreferencedBuckets(); + bm.forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); } // We should have published nothing and have the first // checkpoint still queued. REQUIRE(hm.getPublishSuccessCount() == 0); - REQUIRE(hm.getMinLedgerQueuedToPublish() == 7); + REQUIRE(HistoryManager::getMinLedgerQueuedToPublish(app->getConfig()) == + 7); auto oldReattachments = bm.readMergeCounters().mFinishedMergeReattachments; - auto HASs = hm.getPublishQueueStates(); + auto HASs = HistoryManager::getPublishQueueStates(app->getConfig()); REQUIRE(HASs.size() == 5); for (auto& has : HASs) { diff --git a/src/bucket/test/BucketTestUtils.cpp b/src/bucket/test/BucketTestUtils.cpp index e56fe35b2e..34122c63d2 100644 --- a/src/bucket/test/BucketTestUtils.cpp +++ b/src/bucket/test/BucketTestUtils.cpp @@ -101,6 +101,10 @@ closeLedger(Application& app, std::optional skToSignValue, app.getHerder().externalizeValue(TxSetXDRFrame::makeEmpty(lcl), ledgerNum, lcl.header.scpValue.closeTime, upgrades, skToSignValue); + testutil::crankUntil( + app, + [&lm, ledgerNum]() { return lm.getLastClosedLedgerNum() == ledgerNum; }, + std::chrono::seconds(10)); return lm.getLastClosedLedgerHeader().hash; } @@ -232,7 +236,9 @@ LedgerManagerForBucketTests::transferLedgerEntriesToBucketList( auto evictedState = mApp.getBucketManager().resolveBackgroundEvictionScan( - ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers); + ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers, + mApp.getLedgerManager() + .getMutableSorobanNetworkConfig()); if (protocolVersionStartsFrom( initialLedgerVers, diff --git a/src/catchup/ApplyCheckpointWork.cpp b/src/catchup/ApplyCheckpointWork.cpp index 8cdafece84..ed950a2672 100644 --- a/src/catchup/ApplyCheckpointWork.cpp +++ b/src/catchup/ApplyCheckpointWork.cpp @@ -31,13 +31,13 @@ ApplyCheckpointWork::ApplyCheckpointWork(Application& app, BasicWork::RETRY_NEVER) , mDownloadDir(downloadDir) , mLedgerRange(range) - , mCheckpoint( - app.getHistoryManager().checkpointContainingLedger(range.mFirst)) + , mCheckpoint(HistoryManager::checkpointContainingLedger(range.mFirst, + app.getConfig())) , mOnFailure(cb) { // Ledger range check to enforce application of a single checkpoint - auto const& hm = mApp.getHistoryManager(); - auto low = hm.firstLedgerInCheckpointContaining(mCheckpoint); + auto low = HistoryManager::firstLedgerInCheckpointContaining( + mCheckpoint, mApp.getConfig()); if (mLedgerRange.mFirst != low) { throw std::runtime_error( diff --git a/src/catchup/ApplyLedgerWork.cpp b/src/catchup/ApplyLedgerWork.cpp index 5d910f8bf5..bba96df816 100644 --- a/src/catchup/ApplyLedgerWork.cpp +++ b/src/catchup/ApplyLedgerWork.cpp @@ -23,7 +23,8 @@ BasicWork::State ApplyLedgerWork::onRun() { ZoneScoped; - mApp.getLedgerManager().closeLedger(mLedgerCloseData); + mApp.getLedgerManager().closeLedger(mLedgerCloseData, + /* externalize */ false); return BasicWork::State::WORK_SUCCESS; } diff --git a/src/catchup/CatchupManager.h b/src/catchup/CatchupManager.h index a6f5344880..f0b38d0ec2 100644 --- a/src/catchup/CatchupManager.h +++ b/src/catchup/CatchupManager.h @@ -49,11 +49,18 @@ class CatchupManager friend CatchupMetrics operator-(CatchupMetrics const& x, CatchupMetrics const& y); }; + + enum class ProcessLedgerResult + { + PROCESSED_ALL_LEDGERS_SEQUENTIALLY, + WAIT_TO_APPLY_BUFFERED_OR_CATCHUP + }; static std::unique_ptr create(Application& app); // Process ledgers that could not be applied, and determine if catchup // should run - virtual void processLedger(LedgerCloseData const& ledgerData) = 0; + virtual ProcessLedgerResult processLedger(LedgerCloseData const& ledgerData, + bool isLatestSlot) = 0; // Forcibly switch the application into catchup mode, treating `toLedger` // as the destination ledger number and count as the number of past ledgers @@ -103,6 +110,8 @@ class CatchupManager // heard of. virtual uint32_t getLargestLedgerSeqHeard() const = 0; + virtual uint32_t getMaxScheduledToApply() = 0; + // Ensure any metrics that are "current state" gauge-like counters reflect // the current reality as best as possible. virtual void syncMetrics() = 0; diff --git a/src/catchup/CatchupManagerImpl.cpp b/src/catchup/CatchupManagerImpl.cpp index 9c101066cf..98985df685 100644 --- a/src/catchup/CatchupManagerImpl.cpp +++ b/src/catchup/CatchupManagerImpl.cpp @@ -8,6 +8,7 @@ #include "util/asio.h" #include "catchup/CatchupManagerImpl.h" #include "catchup/CatchupConfiguration.h" +#include "herder/Herder.h" #include "history/FileTransferInfo.h" #include "ledger/LedgerManager.h" #include "main/Application.h" @@ -23,6 +24,8 @@ namespace stellar { +const uint32_t CatchupManagerImpl::MAX_EXTERNALIZE_LEDGER_APPLY_DRIFT = 12; + CatchupManagerImpl::CatchupMetrics::CatchupMetrics() : mHistoryArchiveStatesDownloaded{0} , mCheckpointsDownloaded{0} @@ -72,7 +75,8 @@ findFirstCheckpoint(T begin, T end, HistoryManager const& hm) { return std::find_if(begin, end, [&hm](std::pair const& kvp) { - return hm.isFirstLedgerInCheckpoint(kvp.first); + return HistoryManager::isFirstLedgerInCheckpoint( + kvp.first, hm.getConfig()); }); } @@ -89,6 +93,7 @@ CatchupManagerImpl::CatchupManagerImpl(Application& app) app.getMetrics().NewCounter({"ledger", "memory", "queued-ledgers"})) , mLargestLedgerSeqHeard(0) { + releaseAssert(threadIsMain()); } CatchupManagerImpl::~CatchupManagerImpl() @@ -98,14 +103,19 @@ CatchupManagerImpl::~CatchupManagerImpl() uint32_t CatchupManagerImpl::getCatchupCount() { + releaseAssert(threadIsMain()); return mApp.getConfig().CATCHUP_COMPLETE ? std::numeric_limits::max() : mApp.getConfig().CATCHUP_RECENT; } -void -CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) +CatchupManager::ProcessLedgerResult +CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData, + bool isLatestSlot) { + releaseAssert(threadIsMain()); + updateLastQueuedToApply(); + ZoneScoped; if (catchupWorkIsDone()) { @@ -119,26 +129,33 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) logAndUpdateCatchupStatus(true); } + // Always skip old ledgers uint32_t lastReceivedLedgerSeq = ledgerData.getLedgerSeq(); + if (lastReceivedLedgerSeq <= *mLastQueuedToApply) + { + // If last queued to apply is already at-or-ahead of the ledger we just + // received from the network, we're up to date. Return early, nothing to + // do. + CLOG_INFO( + Ledger, + "Skipping close ledger: local state is {}, more recent than {}", + *mLastQueuedToApply, ledgerData.getLedgerSeq()); + return ProcessLedgerResult::PROCESSED_ALL_LEDGERS_SEQUENTIALLY; + } + + // Always add a newer ledger, maybe apply + mSyncingLedgers.emplace(lastReceivedLedgerSeq, ledgerData); mLargestLedgerSeqHeard = std::max(mLargestLedgerSeqHeard, lastReceivedLedgerSeq); // 1. CatchupWork is not running yet - // 2. CatchupManager received ledger that was immediately applied by + // 2. CatchupManager received ledger that should be immediately applied by // LedgerManager: check if we have any sequential ledgers. // If so, attempt to apply mSyncingLedgers and possibly get back in sync - if (!mCatchupWork && lastReceivedLedgerSeq == - mApp.getLedgerManager().getLastClosedLedgerNum()) + if (!mCatchupWork && lastReceivedLedgerSeq == *mLastQueuedToApply + 1) { tryApplySyncingLedgers(); - return; - } - else if (lastReceivedLedgerSeq <= - mApp.getLedgerManager().getLastClosedLedgerNum()) - { - // If LCL is already at-or-ahead of the ledger we just received from the - // network, we're up to date. Return early, nothing to do. - return; + return ProcessLedgerResult::PROCESSED_ALL_LEDGERS_SEQUENTIALLY; } // For the rest of this method: we know LCL has fallen behind the network @@ -151,6 +168,9 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) // to history and commence catchup, running the (checkpoint-driven) catchup // state machine to ledger L-1 (the end of the checkpoint covering K) and // then replay buffered ledgers from L onwards. + CLOG_INFO(Ledger, + "Close of ledger {} buffered. mSyncingLedgers has {} ledgers", + ledgerData.getLedgerSeq(), mSyncingLedgers.size()); // First: if CatchupWork has started, just buffer and return early. if (mCatchupWork) @@ -160,17 +180,17 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) auto const& config = mCatchupWork->getCatchupConfiguration(); if (ledgerData.getLedgerSeq() <= config.toLedger()) { - return; + return ProcessLedgerResult::WAIT_TO_APPLY_BUFFERED_OR_CATCHUP; } - addAndTrimSyncingLedgers(ledgerData); + trimSyncingLedgers(); logAndUpdateCatchupStatus(true); - return; + return ProcessLedgerResult::WAIT_TO_APPLY_BUFFERED_OR_CATCHUP; } // Next, we buffer every out of sync ledger to allow us to get back in sync // in case the ledgers we're missing are received. - addAndTrimSyncingLedgers(ledgerData); + trimSyncingLedgers(); // Finally we wait some number of ledgers beyond the smallest buffered // checkpoint ledger before we trigger the CatchupWork. This could be any @@ -178,14 +198,14 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) // after the first buffered one. Since we can receive out of order ledgers, // we just check for any ledger larger than the checkpoint - auto& hm = mApp.getHistoryManager(); - std::string message; uint32_t firstLedgerInBuffer = mSyncingLedgers.begin()->first; uint32_t lastLedgerInBuffer = mSyncingLedgers.crbegin()->first; if (mApp.getConfig().modeDoesCatchupWithBucketList() && - hm.isFirstLedgerInCheckpoint(firstLedgerInBuffer) && - firstLedgerInBuffer < lastLedgerInBuffer) + HistoryManager::isFirstLedgerInCheckpoint(firstLedgerInBuffer, + mApp.getConfig()) && + firstLedgerInBuffer < lastLedgerInBuffer && + !mApp.getLedgerManager().isApplying()) { // No point in processing ledgers as catchup won't ever be able to // succeed @@ -207,16 +227,25 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) { // get the smallest checkpoint we need to start catchup uint32_t requiredFirstLedgerInCheckpoint = - hm.isFirstLedgerInCheckpoint(firstLedgerInBuffer) + HistoryManager::isFirstLedgerInCheckpoint(firstLedgerInBuffer, + mApp.getConfig()) ? firstLedgerInBuffer - : hm.firstLedgerAfterCheckpointContaining(firstLedgerInBuffer); + : HistoryManager::firstLedgerAfterCheckpointContaining( + firstLedgerInBuffer, mApp.getConfig()); - uint32_t catchupTriggerLedger = - hm.ledgerToTriggerCatchup(requiredFirstLedgerInCheckpoint); + uint32_t catchupTriggerLedger = HistoryManager::ledgerToTriggerCatchup( + requiredFirstLedgerInCheckpoint, mApp.getConfig()); + if (mApp.getLedgerManager().isApplying()) + { + message = + fmt::format(FMT_STRING("Waiting for ledger {:d} application to " + "complete before starting catchup"), + getMaxScheduledToApply()); + } // If the trigger ledger is behind the last ledger, that means we're // waiting for out of order ledgers, which should arrive quickly - if (catchupTriggerLedger > lastLedgerInBuffer) + else if (catchupTriggerLedger > lastLedgerInBuffer) { auto eta = (catchupTriggerLedger - lastLedgerInBuffer) * mApp.getConfig().getExpectedLedgerCloseTime(); @@ -233,6 +262,7 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) } } logAndUpdateCatchupStatus(true, message); + return ProcessLedgerResult::WAIT_TO_APPLY_BUFFERED_OR_CATCHUP; } void @@ -241,7 +271,10 @@ CatchupManagerImpl::startCatchup( std::set> bucketsToRetain) { ZoneScoped; - auto lastClosedLedger = mApp.getLedgerManager().getLastClosedLedgerNum(); + releaseAssert(threadIsMain()); + updateLastQueuedToApply(); + + auto lastClosedLedger = *mLastQueuedToApply; if ((configuration.toLedger() != CatchupConfiguration::CURRENT) && (configuration.toLedger() <= lastClosedLedger)) { @@ -250,13 +283,6 @@ CatchupManagerImpl::startCatchup( configuration.toLedger(), lastClosedLedger)); } - if (configuration.localBucketsOnly() != - mApp.getLedgerManager().rebuildingInMemoryState()) - { - throw std::invalid_argument( - "Local catchup is only valid when rebuilding ledger state"); - } - // Offline and local catchup types aren't triggered by buffered ledgers auto offlineCatchup = configuration.offline() || configuration.localBucketsOnly(); @@ -273,12 +299,14 @@ CatchupManagerImpl::startCatchup( std::string CatchupManagerImpl::getStatus() const { + releaseAssert(threadIsMain()); return mCatchupWork ? mCatchupWork->getStatus() : std::string{}; } BasicWork::State CatchupManagerImpl::getCatchupWorkState() const { + releaseAssert(threadIsMain()); releaseAssert(mCatchupWork); return mCatchupWork->getState(); } @@ -286,12 +314,14 @@ CatchupManagerImpl::getCatchupWorkState() const bool CatchupManagerImpl::catchupWorkIsDone() const { + releaseAssert(threadIsMain()); return mCatchupWork && mCatchupWork->isDone(); } bool CatchupManagerImpl::isCatchupInitialized() const { + releaseAssert(threadIsMain()); return mCatchupWork != nullptr; } @@ -299,6 +329,7 @@ void CatchupManagerImpl::logAndUpdateCatchupStatus(bool contiguous, std::string const& message) { + releaseAssert(threadIsMain()); if (!message.empty()) { auto contiguousString = @@ -323,16 +354,20 @@ CatchupManagerImpl::logAndUpdateCatchupStatus(bool contiguous, void CatchupManagerImpl::logAndUpdateCatchupStatus(bool contiguous) { + releaseAssert(threadIsMain()); logAndUpdateCatchupStatus(contiguous, getStatus()); } std::optional CatchupManagerImpl::maybeGetNextBufferedLedgerToApply() { + releaseAssert(threadIsMain()); + // Since we just applied a ledger, refresh mLastQueuedToApply + updateLastQueuedToApply(); + trimSyncingLedgers(); if (!mSyncingLedgers.empty() && - mSyncingLedgers.begin()->first == - mApp.getLedgerManager().getLastClosedLedgerNum() + 1) + mSyncingLedgers.begin()->first == *mLastQueuedToApply + 1) { return std::make_optional( mSyncingLedgers.begin()->second); @@ -346,6 +381,7 @@ CatchupManagerImpl::maybeGetNextBufferedLedgerToApply() std::optional CatchupManagerImpl::maybeGetLargestBufferedLedger() { + releaseAssert(threadIsMain()); if (!mSyncingLedgers.empty()) { return std::make_optional( @@ -360,29 +396,45 @@ CatchupManagerImpl::maybeGetLargestBufferedLedger() uint32_t CatchupManagerImpl::getLargestLedgerSeqHeard() const { + releaseAssert(threadIsMain()); return mLargestLedgerSeqHeard; } +uint32_t +CatchupManagerImpl::getMaxScheduledToApply() +{ + releaseAssert(threadIsMain()); + updateLastQueuedToApply(); + return *mLastQueuedToApply; +} + void CatchupManagerImpl::syncMetrics() { + releaseAssert(threadIsMain()); mSyncingLedgersSize.set_count(mSyncingLedgers.size()); } void -CatchupManagerImpl::addAndTrimSyncingLedgers(LedgerCloseData const& ledgerData) +CatchupManagerImpl::updateLastQueuedToApply() { - mSyncingLedgers.emplace(ledgerData.getLedgerSeq(), ledgerData); - trimSyncingLedgers(); - - CLOG_INFO(Ledger, - "Close of ledger {} buffered. mSyncingLedgers has {} ledgers", - ledgerData.getLedgerSeq(), mSyncingLedgers.size()); + releaseAssert(threadIsMain()); + if (!mLastQueuedToApply) + { + mLastQueuedToApply = mApp.getLedgerManager().getLastClosedLedgerNum(); + } + else + { + mLastQueuedToApply = + std::max(*mLastQueuedToApply, + mApp.getLedgerManager().getLastClosedLedgerNum()); + } } void CatchupManagerImpl::startOnlineCatchup() { + releaseAssert(threadIsMain()); releaseAssert(mSyncingLedgers.size() > 1); // catchup just before first buffered ledger that way we will have a @@ -399,7 +451,7 @@ CatchupManagerImpl::startOnlineCatchup() void CatchupManagerImpl::trimSyncingLedgers() { - + releaseAssert(threadIsMain()); auto removeLedgersLessThan = [&](uint32_t ledger) { // lower_bound returns an iterator pointing to the first element whose // key is not considered to go before k. Thus we get the iterator to @@ -408,12 +460,12 @@ CatchupManagerImpl::trimSyncingLedgers() // This erases [begin, it). mSyncingLedgers.erase(mSyncingLedgers.begin(), it); }; - removeLedgersLessThan(mApp.getLedgerManager().getLastClosedLedgerNum() + 1); - auto& hm = mApp.getHistoryManager(); + removeLedgersLessThan(*mLastQueuedToApply + 1); if (!mSyncingLedgers.empty()) { auto const lastBufferedLedger = mSyncingLedgers.rbegin()->first; - if (hm.isFirstLedgerInCheckpoint(lastBufferedLedger)) + if (HistoryManager::isFirstLedgerInCheckpoint(lastBufferedLedger, + mApp.getConfig())) { // The last ledger is the first ledger in the checkpoint. // This means that nodes may not have started publishing @@ -421,7 +473,8 @@ CatchupManagerImpl::trimSyncingLedgers() // We should only keep lastBufferedLedger _and_ the checkpoint // before that. removeLedgersLessThan( - hm.firstLedgerInCheckpointContaining(lastBufferedLedger - 1)); + HistoryManager::firstLedgerInCheckpointContaining( + lastBufferedLedger - 1, mApp.getConfig())); } else { @@ -430,7 +483,8 @@ CatchupManagerImpl::trimSyncingLedgers() // the checkpoint of lastBufferedLedger. // Therefore, we will delete all ledgers before the checkpoint. removeLedgersLessThan( - hm.firstLedgerInCheckpointContaining(lastBufferedLedger)); + HistoryManager::firstLedgerInCheckpointContaining( + lastBufferedLedger, mApp.getConfig())); } } } @@ -439,8 +493,9 @@ void CatchupManagerImpl::tryApplySyncingLedgers() { ZoneScoped; - auto const& ledgerHeader = - mApp.getLedgerManager().getLastClosedLedgerHeader(); + releaseAssert(threadIsMain()); + uint32_t nextToClose = *mLastQueuedToApply + 1; + auto lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); // We can apply multiple ledgers here, which might be slow. This is a rare // occurrence so we should be fine. @@ -450,16 +505,47 @@ CatchupManagerImpl::tryApplySyncingLedgers() auto const& lcd = it->second; // we still have a missing ledger - if (ledgerHeader.header.ledgerSeq + 1 != lcd.getLedgerSeq()) + if (nextToClose != lcd.getLedgerSeq()) + { + break; + } + + // If we have too many ledgers queued to apply, just stop scheduling + // more and let the node gracefully go into catchup. + releaseAssert(mLastQueuedToApply >= lcl); + if (nextToClose - lcl >= MAX_EXTERNALIZE_LEDGER_APPLY_DRIFT) { + CLOG_INFO(History, + "Next ledger to apply is {}, but LCL {} is too far " + "behind, waiting", + nextToClose, lcl); break; } - mApp.getLedgerManager().closeLedger(lcd); - CLOG_INFO(History, "Closed buffered ledger: {}", - LedgerManager::ledgerAbbrev(ledgerHeader)); + if (mApp.getConfig().parallelLedgerClose()) + { + // Notify LM that application has started + mApp.getLedgerManager().beginApply(); + mApp.postOnLedgerCloseThread( + [&app = mApp, lcd]() { + // No-op if app is shutting down + if (app.isStopping()) + { + return; + } + app.getLedgerManager().closeLedger(lcd, + /* externalize */ true); + }, + "closeLedger queue"); + } + else + { + mApp.getLedgerManager().closeLedger(lcd, /* externalize */ true); + } + mLastQueuedToApply = lcd.getLedgerSeq(); ++it; + ++nextToClose; } mSyncingLedgers.erase(mSyncingLedgers.cbegin(), it); @@ -468,35 +554,41 @@ CatchupManagerImpl::tryApplySyncingLedgers() void CatchupManagerImpl::historyArchiveStatesDownloaded(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mHistoryArchiveStatesDownloaded += num; } void CatchupManagerImpl::ledgersVerified(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mLedgersVerified += num; } void CatchupManagerImpl::ledgerChainsVerificationFailed(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mLedgerChainsVerificationFailed += num; } void CatchupManagerImpl::bucketsApplied(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mBucketsApplied += num; } void CatchupManagerImpl::txSetsApplied(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mTxSetsApplied += num; } void CatchupManagerImpl::fileDownloaded(FileType type, uint32_t num) { + releaseAssert(threadIsMain()); if (type == FileType::HISTORY_FILE_TYPE_BUCKET) { mMetrics.mBucketsDownloaded += num; diff --git a/src/catchup/CatchupManagerImpl.h b/src/catchup/CatchupManagerImpl.h index b02876c7c7..1b049c33b6 100644 --- a/src/catchup/CatchupManagerImpl.h +++ b/src/catchup/CatchupManagerImpl.h @@ -22,6 +22,11 @@ class Work; class CatchupManagerImpl : public CatchupManager { + // Maximum number of ledgers that can be queued to apply (this only applies + // when Config.parallelLedgerClose() == true). If this number if exceeded, + // core stops scheduling new ledgers to apply, and goes into catchup mode. + static uint32_t const MAX_EXTERNALIZE_LEDGER_APPLY_DRIFT; + Application& mApp; std::shared_ptr mCatchupWork; @@ -44,12 +49,26 @@ class CatchupManagerImpl : public CatchupManager std::map mSyncingLedgers; medida::Counter& mSyncingLedgersSize; - void addAndTrimSyncingLedgers(LedgerCloseData const& ledgerData); + // Conceptually, there are three ledger sequences that LedgerManager, Herder + // and CatchupManager rely on: + // - L (mLargestLedgerSeqHeard) = maximum ledger that core heard the + // network externalize, may or may not be applied. + // - Q (mLastQueuedToApply) = Only applicable when + // mConfig.parallelLedgerClose() == true. Maximum ledger that was + // externalized by the network and passed to background thread for + // application. + // - LCL = last closed ledger, the last ledger that was externalized, and + // applied by core. + // - Core maintains the following invariace LCL <= Q <= L. Eventually, + // every externalized ledger will be applied. + std::optional mLastQueuedToApply; + uint32_t mLargestLedgerSeqHeard; + + void updateLastQueuedToApply(); void startOnlineCatchup(); void trimSyncingLedgers(); void tryApplySyncingLedgers(); uint32_t getCatchupCount(); - uint32_t mLargestLedgerSeqHeard; CatchupMetrics mMetrics; // Check if catchup can't be performed due to local version incompatibility @@ -61,7 +80,8 @@ class CatchupManagerImpl : public CatchupManager CatchupManagerImpl(Application& app); ~CatchupManagerImpl() override; - void processLedger(LedgerCloseData const& ledgerData) override; + ProcessLedgerResult processLedger(LedgerCloseData const& ledgerData, + bool isLatestSlot) override; void startCatchup( CatchupConfiguration configuration, std::shared_ptr archive, @@ -80,6 +100,7 @@ class CatchupManagerImpl : public CatchupManager std::optional maybeGetNextBufferedLedgerToApply() override; std::optional maybeGetLargestBufferedLedger() override; uint32_t getLargestLedgerSeqHeard() const override; + uint32_t getMaxScheduledToApply() override; void syncMetrics() override; @@ -114,6 +135,14 @@ class CatchupManagerImpl : public CatchupManager { return mCatchupFatalFailure; } + + std::optional mMaxExternalizeApplyBuffer; + uint32_t + getMaxExternalizeApplyBuffer() + { + return mMaxExternalizeApplyBuffer ? *mMaxExternalizeApplyBuffer + : MAX_EXTERNALIZE_LEDGER_APPLY_DRIFT; + } #endif }; } diff --git a/src/catchup/CatchupWork.cpp b/src/catchup/CatchupWork.cpp index e7434bdf0b..89fc839791 100644 --- a/src/catchup/CatchupWork.cpp +++ b/src/catchup/CatchupWork.cpp @@ -90,10 +90,6 @@ CatchupWork::CatchupWork(Application& app, CLOG_INFO(History, "CatchupWork: selected archive {}", mArchive->getName()); } - - // Local catchup is only valid if core is rebuilding state - releaseAssert(mCatchupConfiguration.localBucketsOnly() == - mApp.getLedgerManager().rebuildingInMemoryState()); } CatchupWork::~CatchupWork() @@ -323,8 +319,8 @@ CatchupWork::getAndMaybeSetHistoryArchiveState() mCatchupConfiguration.toLedger() == CatchupConfiguration::CURRENT ? CatchupConfiguration::CURRENT - : mApp.getHistoryManager().checkpointContainingLedger( - mCatchupConfiguration.toLedger()); + : HistoryManager::checkpointContainingLedger( + mCatchupConfiguration.toLedger(), mApp.getConfig()); // Set retries to 10 to ensure we retry enough in case current // checkpoint isn't published yet mGetHistoryArchiveStateWork = addWork( @@ -528,7 +524,7 @@ CatchupWork::runCatchupStep() // In this case we should actually have been caught-up during // the replay process and, if judged successful, our LCL should // be the one provided as well. - auto& lastClosed = + auto lastClosed = mApp.getLedgerManager().getLastClosedLedgerHeader(); releaseAssert(mLastApplied.hash == lastClosed.hash); releaseAssert(mLastApplied.header == lastClosed.header); @@ -575,9 +571,8 @@ CatchupWork::runCatchupStep() return true; } - auto checkpoint = - app.getHistoryManager().checkpointContainingLedger( - ledgerSeq); + auto checkpoint = HistoryManager::checkpointContainingLedger( + ledgerSeq, app.getConfig()); auto ft = FileTransferInfo( dir, FileType::HISTORY_FILE_TYPE_LEDGER, checkpoint); diff --git a/src/catchup/ReplayDebugMetaWork.cpp b/src/catchup/ReplayDebugMetaWork.cpp index 2d2dcd7fde..6fcbefc13e 100644 --- a/src/catchup/ReplayDebugMetaWork.cpp +++ b/src/catchup/ReplayDebugMetaWork.cpp @@ -164,7 +164,8 @@ ReplayDebugMetaWork::applyLastLedger() if (lcl + 1 == debugTxSet.ledgerSeq) { mApp.getLedgerManager().closeLedger( - LedgerCloseData::toLedgerCloseData(debugTxSet)); + LedgerCloseData::toLedgerCloseData(debugTxSet), + /* externalize */ false); } else { diff --git a/src/database/Database.cpp b/src/database/Database.cpp index e6288ee6e3..29738ba7a6 100644 --- a/src/database/Database.cpp +++ b/src/database/Database.cpp @@ -216,7 +216,7 @@ Database::applySchemaUpgrade(unsigned long vers) Upgrades::dropSupportUpgradeHistory(*this); break; case 24: - getSession() << "DROP TABLE IF EXISTS pubsub;"; + getRawSession() << "DROP TABLE IF EXISTS pubsub;"; mApp.getPersistentState().migrateToSlotStateTable(); break; default: diff --git a/src/database/test/DatabaseTests.cpp b/src/database/test/DatabaseTests.cpp index c2fc838bd3..62be8787e1 100644 --- a/src/database/test/DatabaseTests.cpp +++ b/src/database/test/DatabaseTests.cpp @@ -32,7 +32,7 @@ transactionTest(Application::pointer app) int a0 = a + 1; int a1 = a + 2; - auto& session = app->getDatabase().getSession(); + auto& session = app->getDatabase().getRawSession(); session << "DROP TABLE IF EXISTS test"; session << "CREATE TABLE test (x INTEGER)"; @@ -104,7 +104,7 @@ checkMVCCIsolation(Application::pointer app) int s2r1 = 0, s2r2 = 0, s2r3 = 0, s2r4 = 0; - auto& sess1 = app->getDatabase().getSession(); + auto& sess1 = app->getDatabase().getRawSession(); sess1 << "DROP TABLE IF EXISTS test"; sess1 << "CREATE TABLE test (x INTEGER)"; @@ -217,7 +217,7 @@ TEST_CASE("postgres smoketest", "[db]") Application::pointer app = createTestApplication(clock, cfg); int a = 10, b = 0; - auto& session = app->getDatabase().getSession(); + auto& session = app->getDatabase().getRawSession(); SECTION("round trip") { @@ -249,7 +249,7 @@ TEST_CASE("postgres smoketest", "[db]") SECTION("postgres MVCC test") { - app->getDatabase().getSession() << "drop table if exists test"; + app->getDatabase().getRawSession() << "drop table if exists test"; checkMVCCIsolation(app); } } @@ -279,7 +279,7 @@ TEST_CASE("postgres performance", "[db][pgperf][!hide]") try { Application::pointer app = createTestApplication(clock, cfg); - auto& session = app->getDatabase().getSession(); + auto& session = app->getDatabase().getRawSession(); session << "drop table if exists txtest;"; session << "create table txtest (a bigint, b bigint, c bigint, primary " @@ -356,6 +356,5 @@ TEST_CASE("schema test", "[db]") auto& db = app->getDatabase(); auto dbv = db.getDBSchemaVersion(); - auto av = db.getAppSchemaVersion(); - REQUIRE(dbv == av); + REQUIRE(dbv == SCHEMA_VERSION); } diff --git a/src/herder/Herder.h b/src/herder/Herder.h index 5ed657b5f3..ba64929101 100644 --- a/src/herder/Herder.h +++ b/src/herder/Herder.h @@ -114,7 +114,8 @@ class Herder // restores Herder's state from disk virtual void start() = 0; - virtual void lastClosedLedgerIncreased(bool latest) = 0; + virtual void lastClosedLedgerIncreased(bool latest, + TxSetXDRFrameConstPtr txSet) = 0; // Setup Herder's state to fully participate in consensus virtual void setTrackingSCPState(uint64_t index, StellarValue const& value, @@ -222,5 +223,7 @@ class Herder virtual bool isBannedTx(Hash const& hash) const = 0; virtual TransactionFrameBaseConstPtr getTx(Hash const& hash) const = 0; + + virtual void beginApply() = 0; }; } diff --git a/src/herder/HerderImpl.cpp b/src/herder/HerderImpl.cpp index e62ea4d75a..a3ab30b28b 100644 --- a/src/herder/HerderImpl.cpp +++ b/src/herder/HerderImpl.cpp @@ -3,6 +3,8 @@ // of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 #include "herder/HerderImpl.h" +#include "bucket/BucketManager.h" +#include "bucket/BucketSnapshotManager.h" #include "crypto/Hex.h" #include "crypto/KeyUtils.h" #include "crypto/SHA.h" @@ -14,9 +16,6 @@ #include "herder/TxSetFrame.h" #include "herder/TxSetUtils.h" #include "ledger/LedgerManager.h" -#include "ledger/LedgerTxn.h" -#include "ledger/LedgerTxnEntry.h" -#include "ledger/LedgerTxnHeader.h" #include "lib/json/json.h" #include "main/Application.h" #include "main/Config.h" @@ -249,10 +248,6 @@ HerderImpl::newSlotExternalized(bool synchronous, StellarValue const& value) // start timing next externalize from this point mLastExternalize = mApp.getClock().now(); - // In order to update the transaction queue we need to get the - // applied transactions. - updateTransactionQueue(mPendingEnvelopes.getTxSet(value.txSetHash)); - // perform cleanups // Evict slots that are outside of our ledger validity bracket auto minSlotToRemember = getMinLedgerSeqToRemember(); @@ -359,7 +354,7 @@ HerderImpl::processExternalized(uint64 slotIndex, StellarValue const& value, writeDebugTxSet(ledgerData); } - mLedgerManager.valueExternalized(ledgerData); + mLedgerManager.valueExternalized(ledgerData, isLatestSlot); } void @@ -434,6 +429,15 @@ recordExternalizeAndCheckCloseTimeDrift( } } +void +HerderImpl::beginApply() +{ + // Tx set might be applied async: in this case, cancel the timer. It'll be + // restarted when the tx set is applied. This is needed to not mess with + // Herder's out of sync recovery mechanism. + mTrackingTimer.cancel(); +} + void HerderImpl::valueExternalized(uint64 slotIndex, StellarValue const& value, bool isLatestSlot) @@ -476,10 +480,6 @@ HerderImpl::valueExternalized(uint64 slotIndex, StellarValue const& value, // Check to see if quorums have changed and we need to reanalyze. checkAndMaybeReanalyzeQuorumMap(); - - // heart beat *after* doing all the work (ensures that we do not include - // the overhead of externalization in the way we track SCP) - trackingHeartBeat(); } else { @@ -1136,16 +1136,31 @@ HerderImpl::safelyProcessSCPQueue(bool synchronous) } void -HerderImpl::lastClosedLedgerIncreased(bool latest) +HerderImpl::lastClosedLedgerIncreased(bool latest, TxSetXDRFrameConstPtr txSet) { + releaseAssert(threadIsMain()); + maybeSetupSorobanQueue( mLedgerManager.getLastClosedLedgerHeader().header.ledgerVersion); // Ensure potential upgrades are handled in overlay maybeHandleUpgrade(); + // In order to update the transaction queue we need to get the + // applied transactions. + updateTransactionQueue(txSet); + + // If we're in sync and there are no buffered ledgers to apply, trigger next + // ledger if (latest) { + // Re-start heartbeat tracking _after_ applying the most up-to-date + // ledger. This guarantees out-of-sync timer won't fire while we have + // ledgers to apply. + trackingHeartBeat(); + + // Ensure out of sync recovery did not get triggered while we were + // applying releaseAssert(isTracking()); releaseAssert(trackingConsensusLedgerIndex() == mLedgerManager.getLastClosedLedgerNum()); @@ -1158,6 +1173,10 @@ HerderImpl::lastClosedLedgerIncreased(bool latest) void HerderImpl::setupTriggerNextLedger() { + // Invariant: core proceeds to vote for the next ledger only when it's _not_ + // applying to ensure block production does not conflict with ledger close. + releaseAssert(!mLedgerManager.isApplying()); + // Invariant: tracking is equal to LCL when we trigger. This helps ensure // core emits SCP messages only for slots it can fully validate // (any closed ledger is fully validated) @@ -1301,8 +1320,8 @@ uint32_t HerderImpl::getMostRecentCheckpointSeq() { auto lastIndex = trackingConsensusLedgerIndex(); - return mApp.getHistoryManager().firstLedgerInCheckpointContaining( - lastIndex); + return HistoryManager::firstLedgerInCheckpointContaining(lastIndex, + mApp.getConfig()); } void @@ -1347,8 +1366,18 @@ HerderImpl::triggerNextLedger(uint32_t ledgerSeqToTrigger, return; } + // If applying, the next ledger will trigger voting + if (mLedgerManager.isApplying()) + { + CLOG_DEBUG(Herder, "triggerNextLedger: skipping (applying) : {}", + mApp.getStateHuman()); + return; + } + // our first choice for this round's set is all the tx we have collected // during last few ledger closes + // Since we are not currently applying, it is safe to use read-only LCL, as + // it's guaranteed to be up-to-date auto const& lcl = mLedgerManager.getLastClosedLedgerHeader(); TxSetPhaseTransactions txPhases; txPhases.emplace_back(mTransactionQueue.getTransactions(lcl.header)); @@ -1531,7 +1560,7 @@ HerderImpl::getUpgradesJson() void HerderImpl::forceSCPStateIntoSyncWithLastClosedLedger() { - auto const& header = mLedgerManager.getLastClosedLedgerHeader().header; + auto header = mLedgerManager.getLastClosedLedgerHeader().header; setTrackingSCPState(header.ledgerSeq, header.scpValue, /* isTrackingNetwork */ true); } @@ -2262,6 +2291,7 @@ HerderImpl::purgeOldPersistedTxSets() void HerderImpl::trackingHeartBeat() { + releaseAssert(threadIsMain()); if (mApp.getConfig().MANUAL_CLOSE) { return; @@ -2326,6 +2356,15 @@ void HerderImpl::herderOutOfSync() { ZoneScoped; + // State switch from "tracking" to "out of sync" should only happen if there + // are no ledgers queued to be applied. If there are ledgers queued, it's + // possible the rest of the network is waiting for this node to vote. In + // this case we should _still_ remain in tracking and emit nomination; If + // the nodes does not hear anything from the network after that, then node + // can go into out of sync recovery. + releaseAssert(threadIsMain()); + releaseAssert(!mLedgerManager.isApplying()); + CLOG_WARNING(Herder, "Lost track of consensus"); auto s = getJsonInfo(20).toStyledString(); diff --git a/src/herder/HerderImpl.h b/src/herder/HerderImpl.h index be1d3d8e12..015cde5e8f 100644 --- a/src/herder/HerderImpl.h +++ b/src/herder/HerderImpl.h @@ -75,7 +75,8 @@ class HerderImpl : public Herder void start() override; - void lastClosedLedgerIncreased(bool latest) override; + void lastClosedLedgerIncreased(bool latest, + TxSetXDRFrameConstPtr txSet) override; SCP& getSCP(); HerderSCPDriver& @@ -195,6 +196,8 @@ class HerderImpl : public Herder xdr::xvector const& upgrades, SecretKey const& s) override; + virtual void beginApply() override; + void startTxSetGCTimer(); #ifdef BUILD_TESTS diff --git a/src/herder/HerderPersistence.h b/src/herder/HerderPersistence.h index e5d44b0cc5..ea82cd747b 100644 --- a/src/herder/HerderPersistence.h +++ b/src/herder/HerderPersistence.h @@ -49,6 +49,5 @@ class HerderPersistence static void dropAll(Database& db); static void deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count); - static void createQuorumTrackingTable(soci::session& sess); }; } diff --git a/src/herder/HerderPersistenceImpl.cpp b/src/herder/HerderPersistenceImpl.cpp index bd743e8072..8f7977212a 100644 --- a/src/herder/HerderPersistenceImpl.cpp +++ b/src/herder/HerderPersistenceImpl.cpp @@ -40,6 +40,7 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, QuorumTracker::QuorumMap const& qmap) { ZoneScoped; + releaseAssert(threadIsMain()); if (envs.empty()) { return; @@ -47,12 +48,13 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, auto usedQSets = UnorderedMap{}; auto& db = mApp.getDatabase(); + auto& sess = db.getSession(); - soci::transaction txscope(db.getSession()); + soci::transaction txscope(sess.session()); { auto prepClean = db.getPreparedStatement( - "DELETE FROM scphistory WHERE ledgerseq =:l"); + "DELETE FROM scphistory WHERE ledgerseq =:l", sess); auto& st = prepClean.statement(); st.exchange(soci::use(seq)); @@ -92,7 +94,8 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, auto prepEnv = db.getPreparedStatement("INSERT INTO scphistory " "(nodeid, ledgerseq, envelope) VALUES " - "(:n, :l, :e)"); + "(:n, :l, :e)", + sess); auto& st = prepEnv.statement(); st.exchange(soci::use(nodeIDs, "n")); st.exchange(soci::use(seqs, "l")); @@ -124,7 +127,7 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, std::string qSetHHex(binToHex(qSetH)); auto prep = db.getPreparedStatement( - "UPDATE quoruminfo SET qsethash = :h WHERE nodeid = :id"); + "UPDATE quoruminfo SET qsethash = :h WHERE nodeid = :id", sess); auto& st = prep.statement(); st.exchange(soci::use(qSetHHex)); st.exchange(soci::use(nodeIDStrKey)); @@ -136,7 +139,8 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, if (st.get_affected_rows() != 1) { auto prepI = db.getPreparedStatement( - "INSERT INTO quoruminfo (nodeid, qsethash) VALUES (:id, :h)"); + "INSERT INTO quoruminfo (nodeid, qsethash) VALUES (:id, :h)", + sess); auto& stI = prepI.statement(); stI.exchange(soci::use(nodeIDStrKey)); stI.exchange(soci::use(qSetHHex)); @@ -158,7 +162,7 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, uint32_t lastSeenSeq; auto prepSelQSet = db.getPreparedStatement( - "SELECT lastledgerseq FROM scpquorums WHERE qsethash = :h"); + "SELECT lastledgerseq FROM scpquorums WHERE qsethash = :h", sess); auto& stSel = prepSelQSet.statement(); stSel.exchange(soci::into(lastSeenSeq)); stSel.exchange(soci::use(qSetH)); @@ -177,7 +181,8 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, auto prepUpQSet = db.getPreparedStatement( "UPDATE scpquorums SET " - "lastledgerseq = :l WHERE qsethash = :h"); + "lastledgerseq = :l WHERE qsethash = :h", + sess); auto& stUp = prepUpQSet.statement(); stUp.exchange(soci::use(seq)); @@ -202,7 +207,8 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, auto prepInsQSet = db.getPreparedStatement( "INSERT INTO scpquorums " "(qsethash, lastledgerseq, qset) VALUES " - "(:h, :l, :v);"); + "(:h, :l, :v);", + sess); auto& stIns = prepInsQSet.statement(); stIns.exchange(soci::use(qSetH)); @@ -230,6 +236,10 @@ HerderPersistence::copySCPHistoryToStream(Database& db, soci::session& sess, XDROutputFileStream& scpHistory) { ZoneScoped; + // TODO: this may conflict with main thread, as this is done in the + // background (this is the case in master today, so can be fixed + // later). + uint32_t begin = ledgerSeq, end = ledgerSeq + ledgerCount; size_t n = 0; @@ -372,38 +382,34 @@ void HerderPersistence::dropAll(Database& db) { ZoneScoped; - db.getSession() << "DROP TABLE IF EXISTS scphistory"; + db.getRawSession() << "DROP TABLE IF EXISTS scphistory"; - db.getSession() << "DROP TABLE IF EXISTS scpquorums"; + db.getRawSession() << "DROP TABLE IF EXISTS scpquorums"; - db.getSession() << "CREATE TABLE scphistory (" - "nodeid CHARACTER(56) NOT NULL," - "ledgerseq INT NOT NULL CHECK (ledgerseq >= 0)," - "envelope TEXT NOT NULL" - ")"; + db.getRawSession() << "CREATE TABLE scphistory (" + "nodeid CHARACTER(56) NOT NULL," + "ledgerseq INT NOT NULL CHECK (ledgerseq >= 0)," + "envelope TEXT NOT NULL" + ")"; - db.getSession() << "CREATE INDEX scpenvsbyseq ON scphistory(ledgerseq)"; + db.getRawSession() << "CREATE INDEX scpenvsbyseq ON scphistory(ledgerseq)"; - db.getSession() << "CREATE TABLE scpquorums (" - "qsethash CHARACTER(64) NOT NULL," - "lastledgerseq INT NOT NULL CHECK (lastledgerseq >= 0)," - "qset TEXT NOT NULL," - "PRIMARY KEY (qsethash)" - ")"; + db.getRawSession() + << "CREATE TABLE scpquorums (" + "qsethash CHARACTER(64) NOT NULL," + "lastledgerseq INT NOT NULL CHECK (lastledgerseq >= 0)," + "qset TEXT NOT NULL," + "PRIMARY KEY (qsethash)" + ")"; - db.getSession() + db.getRawSession() << "CREATE INDEX scpquorumsbyseq ON scpquorums(lastledgerseq)"; - db.getSession() << "DROP TABLE IF EXISTS quoruminfo"; -} - -void -HerderPersistence::createQuorumTrackingTable(soci::session& sess) -{ - sess << "CREATE TABLE quoruminfo (" - "nodeid CHARACTER(56) NOT NULL," - "qsethash CHARACTER(64) NOT NULL," - "PRIMARY KEY (nodeid))"; + db.getRawSession() << "DROP TABLE IF EXISTS quoruminfo"; + db.getRawSession() << "CREATE TABLE quoruminfo (" + "nodeid CHARACTER(56) NOT NULL," + "qsethash CHARACTER(64) NOT NULL," + "PRIMARY KEY (nodeid))"; } void @@ -411,9 +417,9 @@ HerderPersistence::deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count) { ZoneScoped; - DatabaseUtils::deleteOldEntriesHelper(db.getSession(), ledgerSeq, count, + DatabaseUtils::deleteOldEntriesHelper(db.getRawSession(), ledgerSeq, count, "scphistory", "ledgerseq"); - DatabaseUtils::deleteOldEntriesHelper(db.getSession(), ledgerSeq, count, + DatabaseUtils::deleteOldEntriesHelper(db.getRawSession(), ledgerSeq, count, "scpquorums", "lastledgerseq"); } } diff --git a/src/herder/HerderSCPDriver.cpp b/src/herder/HerderSCPDriver.cpp index 47f1453d11..13f5bde947 100644 --- a/src/herder/HerderSCPDriver.cpp +++ b/src/herder/HerderSCPDriver.cpp @@ -203,6 +203,7 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, { ZoneScoped; uint64_t lastCloseTime; + releaseAssert(threadIsMain()); if (b.ext.v() != STELLAR_VALUE_SIGNED) { CLOG_TRACE(Herder, @@ -220,15 +221,15 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, } } - auto const& lcl = mLedgerManager.getLastClosedLedgerHeader().header; + auto lhhe = mLedgerManager.getLastClosedLedgerHeader(); // when checking close time, start with what we have locally - lastCloseTime = lcl.scpValue.closeTime; + lastCloseTime = lhhe.header.scpValue.closeTime; // if this value is not for our local state, // perform as many checks as we can - if (slotIndex != (lcl.ledgerSeq + 1)) + if (slotIndex != (lhhe.header.ledgerSeq + 1)) { - if (slotIndex == lcl.ledgerSeq) + if (slotIndex == lhhe.header.ledgerSeq) { // previous ledger if (b.closeTime != lastCloseTime) @@ -239,7 +240,7 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, return SCPDriver::kInvalidValue; } } - else if (slotIndex < lcl.ledgerSeq) + else if (slotIndex < lhhe.header.ledgerSeq) { // basic sanity check on older value if (b.closeTime >= lastCloseTime) @@ -322,7 +323,7 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, res = SCPDriver::kInvalidValue; } - else if (!checkAndCacheTxSetValid(*txSet, closeTimeOffset)) + else if (!checkAndCacheTxSetValid(*txSet, lhhe, closeTimeOffset)) { CLOG_DEBUG(Herder, "HerderSCPDriver::validateValue i: {} invalid txSet {}", @@ -612,6 +613,7 @@ HerderSCPDriver::combineCandidates(uint64_t slotIndex, std::set aggSet; + releaseAssert(!mLedgerManager.isApplying()); auto const& lcl = mLedgerManager.getLastClosedLedgerHeader(); Hash candidatesHash; @@ -1226,11 +1228,11 @@ HerderSCPDriver::wrapStellarValue(StellarValue const& sv) bool HerderSCPDriver::checkAndCacheTxSetValid(TxSetXDRFrame const& txSet, + LedgerHeaderHistoryEntry const& lcl, uint64_t closeTimeOffset) const { - auto key = TxSetValidityKey{ - mApp.getLedgerManager().getLastClosedLedgerHeader().hash, - txSet.getContentsHash(), closeTimeOffset, closeTimeOffset}; + auto key = TxSetValidityKey{lcl.hash, txSet.getContentsHash(), + closeTimeOffset, closeTimeOffset}; bool* pRes = mTxSetValidCache.maybeGet(key); if (pRes == nullptr) @@ -1241,8 +1243,7 @@ HerderSCPDriver::checkAndCacheTxSetValid(TxSetXDRFrame const& txSet, // might end up with malformed tx set that doesn't refer to the // LCL. ApplicableTxSetFrameConstPtr applicableTxSet; - if (txSet.previousLedgerHash() == - mApp.getLedgerManager().getLastClosedLedgerHeader().hash) + if (txSet.previousLedgerHash() == lcl.hash) { applicableTxSet = txSet.prepareForApply(mApp); } @@ -1250,10 +1251,9 @@ HerderSCPDriver::checkAndCacheTxSetValid(TxSetXDRFrame const& txSet, bool res = true; if (applicableTxSet == nullptr) { - CLOG_ERROR(Herder, - "validateValue i:{} can't prepare txSet {} for apply", - (mApp.getLedgerManager().getLastClosedLedgerNum() + 1), - hexAbbrev(txSet.getContentsHash())); + CLOG_ERROR( + Herder, "validateValue i:{} can't prepare txSet {} for apply", + (lcl.header.ledgerSeq + 1), hexAbbrev(txSet.getContentsHash())); res = false; } else @@ -1286,6 +1286,7 @@ uint64 HerderSCPDriver::getNodeWeight(NodeID const& nodeID, SCPQuorumSet const& qset, bool const isLocalNode) const { + releaseAssert(!mLedgerManager.isApplying()); Config const& cfg = mApp.getConfig(); bool const unsupportedProtocol = protocolVersionIsBefore( mApp.getLedgerManager() diff --git a/src/herder/HerderSCPDriver.h b/src/herder/HerderSCPDriver.h index f5f74e0365..62443f8e27 100644 --- a/src/herder/HerderSCPDriver.h +++ b/src/herder/HerderSCPDriver.h @@ -241,6 +241,7 @@ class HerderSCPDriver : public SCPDriver uint64_t slotIndex); bool checkAndCacheTxSetValid(TxSetXDRFrame const& txSet, + LedgerHeaderHistoryEntry const& lcl, uint64_t closeTimeOffset) const; }; } diff --git a/src/herder/PendingEnvelopes.cpp b/src/herder/PendingEnvelopes.cpp index 4db92c5619..fe47c72937 100644 --- a/src/herder/PendingEnvelopes.cpp +++ b/src/herder/PendingEnvelopes.cpp @@ -745,7 +745,7 @@ PendingEnvelopes::getQSet(Hash const& hash) else { auto& db = mApp.getDatabase(); - qset = HerderPersistence::getQuorumSet(db, db.getSession(), hash); + qset = HerderPersistence::getQuorumSet(db, db.getRawSession(), hash); } if (qset) { @@ -814,7 +814,7 @@ PendingEnvelopes::rebuildQuorumTrackerState() // see if we had some information for that node auto& db = mApp.getDatabase(); auto h = HerderPersistence::getNodeQuorumSet( - db, db.getSession(), id); + db, db.getRawSession(), id); if (h) { res = getQSet(*h); diff --git a/src/herder/PendingEnvelopes.h b/src/herder/PendingEnvelopes.h index afb194dfce..4ffcdf12a6 100644 --- a/src/herder/PendingEnvelopes.h +++ b/src/herder/PendingEnvelopes.h @@ -62,6 +62,10 @@ class PendingEnvelopes using TxSetFramCacheItem = std::pair; // recent txsets + // Note on thread-safety: the cache must be maintained strictly by the main + // thread Other threads may reference TxSetXDRFrameConstPtr, which is safe, + // because shared_ptr ref counting is thread-safe TxSetXDRFrameConstPtr + // itself is immutable, and thus thread-safe RandomEvictionCache mTxSetCache; // weak references to all known txsets UnorderedMap> mKnownTxSets; diff --git a/src/herder/TxSetFrame.cpp b/src/herder/TxSetFrame.cpp index e4f951f458..dce3cef463 100644 --- a/src/herder/TxSetFrame.cpp +++ b/src/herder/TxSetFrame.cpp @@ -239,8 +239,7 @@ phaseTxsAreValid(TxSetTransactions const& phase, Application& app, // Grab read-only latest ledger state; This is only used to validate tx sets // for LCL+1 LedgerSnapshot ls(app); - ls.getLedgerHeader().currentToModify().ledgerSeq = - app.getLedgerManager().getLastClosedLedgerNum() + 1; + ls.getLedgerHeader().currentToModify().ledgerSeq += 1; for (auto const& tx : phase) { auto txResult = tx->checkValid(app.getAppConnector(), ls, 0, @@ -332,6 +331,8 @@ makeTxSetFromTransactions(TxSetPhaseTransactions const& txPhases, #endif ) { + releaseAssert(threadIsMain()); + releaseAssert(!app.getLedgerManager().isApplying()); releaseAssert(txPhases.size() == invalidTxs.size()); releaseAssert(txPhases.size() <= static_cast(TxSetPhase::PHASE_COUNT)); @@ -473,6 +474,8 @@ makeTxSetFromTransactions(TxSetTransactions txs, Application& app, TxSetTransactions& invalidTxs, bool enforceTxsApplyOrder) { + releaseAssert(threadIsMain()); + releaseAssert(!app.getLedgerManager().isApplying()); auto lclHeader = app.getLedgerManager().getLastClosedLedgerHeader(); TxSetPhaseTransactions phases; phases.resize(protocolVersionStartsFrom(lclHeader.header.ledgerVersion, @@ -775,8 +778,13 @@ ApplicableTxSetFrame::ApplicableTxSetFrame(Application& app, bool isGeneralized, , mPhaseInclusionFeeMap(mTxPhases.size()) , mContentsHash(contentsHash) { - releaseAssert(previousLedgerHash == - app.getLedgerManager().getLastClosedLedgerHeader().hash); + // When applying in the background, the same check is performed in + // closeLedger already + if (threadIsMain()) + { + releaseAssert(previousLedgerHash == + app.getLedgerManager().getLastClosedLedgerHeader().hash); + } } ApplicableTxSetFrame::ApplicableTxSetFrame( @@ -875,7 +883,8 @@ ApplicableTxSetFrame::checkValid(Application& app, uint64_t upperBoundCloseTimeOffset) const { ZoneScoped; - auto& lcl = app.getLedgerManager().getLastClosedLedgerHeader(); + releaseAssert(threadIsMain()); + auto const& lcl = app.getLedgerManager().getLastClosedLedgerHeader(); // Start by checking previousLedgerHash if (lcl.hash != mPreviousLedgerHash) @@ -1386,6 +1395,8 @@ void ApplicableTxSetFrame::applySurgePricing(Application& app) { ZoneScoped; + releaseAssert(threadIsMain()); + releaseAssert(!app.getLedgerManager().isApplying()); releaseAssert(mTxPhases.size() <= static_cast(TxSetPhase::PHASE_COUNT)); auto const& lclHeader = diff --git a/src/herder/Upgrades.cpp b/src/herder/Upgrades.cpp index e15080df62..07f64d1385 100644 --- a/src/herder/Upgrades.cpp +++ b/src/herder/Upgrades.cpp @@ -690,22 +690,22 @@ Upgrades::timeForUpgrade(uint64_t time) const void Upgrades::dropAll(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS upgradehistory"; - db.getSession() << "CREATE TABLE upgradehistory (" - "ledgerseq INT NOT NULL CHECK (ledgerseq >= 0), " - "upgradeindex INT NOT NULL, " - "upgrade TEXT NOT NULL, " - "changes TEXT NOT NULL, " - "PRIMARY KEY (ledgerseq, upgradeindex)" - ")"; - db.getSession() + db.getRawSession() << "DROP TABLE IF EXISTS upgradehistory"; + db.getRawSession() << "CREATE TABLE upgradehistory (" + "ledgerseq INT NOT NULL CHECK (ledgerseq >= 0), " + "upgradeindex INT NOT NULL, " + "upgrade TEXT NOT NULL, " + "changes TEXT NOT NULL, " + "PRIMARY KEY (ledgerseq, upgradeindex)" + ")"; + db.getRawSession() << "CREATE INDEX upgradehistbyseq ON upgradehistory (ledgerseq);"; } void Upgrades::dropSupportUpgradeHistory(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS upgradehistory"; + db.getRawSession() << "DROP TABLE IF EXISTS upgradehistory"; } static void diff --git a/src/herder/test/HerderTests.cpp b/src/herder/test/HerderTests.cpp index 0266c276cc..ea35611607 100644 --- a/src/herder/test/HerderTests.cpp +++ b/src/herder/test/HerderTests.cpp @@ -97,22 +97,9 @@ TEST_CASE_VERSIONS("standalone", "[herder][acceptance]") }; auto waitForExternalize = [&]() { - bool stop = false; auto prev = app->getLedgerManager().getLastClosedLedgerNum(); - VirtualTimer checkTimer(*app); - - auto check = [&](asio::error_code const& error) { - REQUIRE(!error); - REQUIRE(app->getLedgerManager().getLastClosedLedgerNum() > - prev); - stop = true; - }; - - checkTimer.expires_from_now( - Herder::EXP_LEDGER_TIMESPAN_SECONDS + - std::chrono::seconds(1)); - checkTimer.async_wait(check); - while (!stop) + while (app->getLedgerManager().getLastClosedLedgerNum() <= + prev + 1) { app->getClock().crank(true); } @@ -2549,10 +2536,10 @@ TEST_CASE("SCP State", "[herder]") REQUIRE(sim->getNode(nodeIDs[0]) ->getLedgerManager() - .getLastClosedLedgerNum() == expectedLedger); + .getLastClosedLedgerNum() >= expectedLedger); REQUIRE(sim->getNode(nodeIDs[1]) ->getLedgerManager() - .getLastClosedLedgerNum() == expectedLedger); + .getLastClosedLedgerNum() >= expectedLedger); lcl = sim->getNode(nodeIDs[0]) ->getLedgerManager() @@ -2650,7 +2637,7 @@ TEST_CASE("SCP State", "[herder]") // then let the nodes run a bit more, they should all externalize the // next ledger sim->crankUntil( - [&]() { return sim->haveAllExternalized(expectedLedger + 1, 5); }, + [&]() { return sim->haveAllExternalized(expectedLedger + 2, 6); }, 2 * numLedgers * Herder::EXP_LEDGER_TIMESPAN_SECONDS, false); // nodes are at least on ledger 7 (some may be on 8) @@ -2659,14 +2646,6 @@ TEST_CASE("SCP State", "[herder]") // All nodes are in sync REQUIRE(sim->getNode(nodeIDs[i])->getState() == Application::State::APP_SYNCED_STATE); - auto const& actual = sim->getNode(nodeIDs[i]) - ->getLedgerManager() - .getLastClosedLedgerHeader() - .header; - if (actual.ledgerSeq == expectedLedger + 1) - { - REQUIRE(actual.previousLedgerHash == lcl.hash); - } } } @@ -2780,8 +2759,8 @@ TEST_CASE("SCP checkpoint", "[catchup][herder]") auto mainNode = simulation->addNode(v0SecretKey, qSet, &cfg1); simulation->startAllNodes(); - auto& hm = mainNode->getHistoryManager(); - auto firstCheckpoint = hm.firstLedgerAfterCheckpointContaining(1); + auto firstCheckpoint = HistoryManager::firstLedgerAfterCheckpointContaining( + 1, mainNode->getConfig()); // Crank until we are halfway through the second checkpoint simulation->crankUntil( @@ -2800,7 +2779,8 @@ TEST_CASE("SCP checkpoint", "[catchup][herder]") mainNode->getConfig().MAX_SLOTS_TO_REMEMBER + 1); auto secondCheckpoint = - hm.firstLedgerAfterCheckpointContaining(firstCheckpoint); + HistoryManager::firstLedgerAfterCheckpointContaining( + firstCheckpoint, mainNode->getConfig()); // Crank until we complete the 2nd checkpoint simulation->crankUntil( @@ -3087,10 +3067,10 @@ TEST_CASE("soroban txs each parameter surge priced", "[soroban][herder]") bool hadSorobanSurgePricing = false; simulation->crankUntil( [&]() { - auto& lclHeader = nodes[0] - ->getLedgerManager() - .getLastClosedLedgerHeader() - .header; + auto const& lclHeader = nodes[0] + ->getLedgerManager() + .getLastClosedLedgerHeader() + .header; auto txSet = nodes[0]->getHerder().getTxSet( lclHeader.scpValue.txSetHash); GeneralizedTransactionSet xdrTxSet; @@ -3204,14 +3184,34 @@ TEST_CASE("overlay parallel processing") { auto networkID = sha256(getTestConfig().NETWORK_PASSPHRASE); - // Set threshold to 1 so all have to vote - auto simulation = - Topologies::core(4, 1, Simulation::OVER_TCP, networkID, [](int i) { - auto cfg = getTestConfig(i); - cfg.TESTING_UPGRADE_MAX_TX_SET_SIZE = 100; - cfg.BACKGROUND_OVERLAY_PROCESSING = true; - return cfg; - }); + std::shared_ptr simulation; + + SECTION("background traffic processing") + { + // Set threshold to 1 so all have to vote + simulation = + Topologies::core(4, 1, Simulation::OVER_TCP, networkID, [](int i) { + auto cfg = getTestConfig(i); + cfg.TESTING_UPGRADE_MAX_TX_SET_SIZE = 100; + cfg.BACKGROUND_OVERLAY_PROCESSING = true; + return cfg; + }); + } + SECTION("background ledger close") + { + // Set threshold to 1 so all have to vote + simulation = + Topologies::core(4, 1, Simulation::OVER_TCP, networkID, [](int i) { + auto cfg = getTestConfig( + i, Config::TESTDB_BUCKET_DB_PERSISTENT_POSTGRES); + cfg.TESTING_UPGRADE_MAX_TX_SET_SIZE = 100; + cfg.EXPERIMENTAL_PARALLEL_LEDGER_CLOSE = true; + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = + std::chrono::milliseconds(500); + return cfg; + }); + } + simulation->startAllNodes(); auto nodes = simulation->getNodes(); uint32_t desiredTxRate = 1; @@ -3243,15 +3243,15 @@ TEST_CASE("overlay parallel processing") // soroban traffic currLoadGenCount = loadGenDone.count(); auto secondLoadGenCount = secondLoadGenDone.count(); - uint32_t const classicTxCount = 200; + uint32_t const txCount = 100; // Generate Soroban txs from one node loadGen.generateLoad(GeneratedLoadConfig::txLoad( LoadGenMode::SOROBAN_UPLOAD, 50, - /* nTxs */ 500, desiredTxRate, /* offset */ 0)); + /* nTxs */ txCount, desiredTxRate, /* offset */ 0)); // Generate classic txs from another node (with offset to prevent // overlapping accounts) secondLoadGen.generateLoad(GeneratedLoadConfig::txLoad( - LoadGenMode::PAY, 50, classicTxCount, desiredTxRate, + LoadGenMode::PAY, 50, txCount, desiredTxRate, /* offset */ 50)); simulation->crankUntil( @@ -3494,13 +3494,26 @@ checkHerder(Application& app, HerderImpl& herder, Herder::State expectedState, // received by a node, we fully control the state of Herder and LM (and whether // each component is in sync or out of sync) static void -herderExternalizesValuesWithProtocol(uint32_t version) +herderExternalizesValuesWithProtocol(uint32_t version, + bool parallelLedgerClose = false, + uint32_t delayCloseMs = 0) { auto networkID = sha256(getTestConfig().NETWORK_PASSPHRASE); auto simulation = std::make_shared( - Simulation::OVER_LOOPBACK, networkID, [version](int i) { - auto cfg = getTestConfig(i, Config::TESTDB_BUCKET_DB_PERSISTENT); + Simulation::OVER_LOOPBACK, networkID, [&](int i) { + auto cfg = getTestConfig( + i, parallelLedgerClose + ? Config::TESTDB_BUCKET_DB_PERSISTENT_POSTGRES + : Config::TESTDB_BUCKET_DB_PERSISTENT); cfg.TESTING_UPGRADE_LEDGER_PROTOCOL_VERSION = version; + if (parallelLedgerClose) + { + cfg.EXPERIMENTAL_PARALLEL_LEDGER_CLOSE = true; + // Add artifical delay to ledger close to increase chances of + // conflicts + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = + std::chrono::milliseconds(delayCloseMs); + } return cfg; }); @@ -3558,7 +3571,7 @@ herderExternalizesValuesWithProtocol(uint32_t version) [&]() { return simulation->haveAllExternalized(destinationLedger, 100); }, - 2 * nLedgers * Herder::EXP_LEDGER_TIMESPAN_SECONDS, false); + 10 * nLedgers * Herder::EXP_LEDGER_TIMESPAN_SECONDS, false); return std::min(currentALedger(), currentCLedger()); }; @@ -3678,6 +3691,7 @@ herderExternalizesValuesWithProtocol(uint32_t version) Herder::ENVELOPE_STATUS_READY); REQUIRE(herder.recvSCPEnvelope(newMsgB.first, qset, newMsgB.second) == Herder::ENVELOPE_STATUS_READY); + simulation->crankForAtLeast(std::chrono::seconds(10), false); }; auto testOutOfOrder = [&](bool partial) { @@ -4258,6 +4272,7 @@ TEST_CASE("do not flood invalid transactions", "[herder]") VirtualClock clock; auto cfg = getTestConfig(); cfg.FLOOD_TX_PERIOD_MS = 1; // flood as fast as possible + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = std::chrono::seconds(0); auto app = createTestApplication(clock, cfg); auto& lm = app->getLedgerManager(); @@ -4311,6 +4326,8 @@ TEST_CASE("do not flood too many soroban transactions", cfg.FLOOD_OP_RATE_PER_LEDGER = 2.0; cfg.FLOOD_SOROBAN_TX_PERIOD_MS = 50; cfg.FLOOD_SOROBAN_RATE_PER_LEDGER = 2.0; + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = + std::chrono::seconds(0); return cfg; }); @@ -5280,7 +5297,8 @@ TEST_CASE("SCP message capture from previous ledger", "[herder]") // Prepare query auto& db = node->getDatabase(); auto prep = db.getPreparedStatement( - "SELECT envelope FROM scphistory WHERE ledgerseq = :l"); + "SELECT envelope FROM scphistory WHERE ledgerseq = :l", + db.getSession()); auto& st = prep.statement(); st.exchange(soci::use(ledgerNum)); std::string envStr; diff --git a/src/history/HistoryManager.h b/src/history/HistoryManager.h index 07b6f64fa9..4a17bb795d 100644 --- a/src/history/HistoryManager.h +++ b/src/history/HistoryManager.h @@ -320,13 +320,13 @@ class HistoryManager // getCheckpointFrequency() -- equivalently, the LCL is one _less_ than // a multiple of getCheckpointFrequency(). Returns true if checkpoint // publication of the LCL was queued, otherwise false. - virtual bool maybeQueueHistoryCheckpoint() = 0; + virtual bool maybeQueueHistoryCheckpoint(uint32_t lcl) = 0; // Checkpoint the LCL -- both the log of history from the previous // checkpoint to it, as well as the bucketlist of its state -- to a // publication-queue in the database. This should be followed shortly // (typically after commit) with a call to publishQueuedHistory. - virtual void queueCurrentHistory() = 0; + virtual void queueCurrentHistory(uint32_t lcl) = 0; // Return the youngest ledger still in the outgoing publish queue; // returns 0 if the publish queue has nothing in it. @@ -341,7 +341,7 @@ class HistoryManager virtual size_t publishQueuedHistory() = 0; // Prepare checkpoint files for publishing - virtual void maybeCheckpointComplete() = 0; + virtual void maybeCheckpointComplete(uint32_t lcl) = 0; // Migrate SQL-based publish queue to the new file format // (one-time call during database schema upgrade path) diff --git a/src/history/HistoryManagerImpl.cpp b/src/history/HistoryManagerImpl.cpp index 4f1932898a..a8d9e23c5c 100644 --- a/src/history/HistoryManagerImpl.cpp +++ b/src/history/HistoryManagerImpl.cpp @@ -375,9 +375,8 @@ HistoryManager::getMaxLedgerQueuedToPublish(Config const& cfg) } bool -HistoryManagerImpl::maybeQueueHistoryCheckpoint() +HistoryManagerImpl::maybeQueueHistoryCheckpoint(uint32_t lcl) { - uint32_t lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); if (!publishCheckpointOnLedgerClose(lcl, mApp.getConfig())) { return false; @@ -390,15 +389,14 @@ HistoryManagerImpl::maybeQueueHistoryCheckpoint() return false; } - queueCurrentHistory(); + queueCurrentHistory(lcl); return true; } void -HistoryManagerImpl::queueCurrentHistory() +HistoryManagerImpl::queueCurrentHistory(uint32_t ledger) { ZoneScoped; - auto ledger = mApp.getLedgerManager().getLastClosedLedgerNum(); LiveBucketList bl; if (mApp.getConfig().MODE_ENABLES_BUCKETLIST) @@ -527,9 +525,8 @@ HistoryManagerImpl::publishQueuedHistory() } void -HistoryManagerImpl::maybeCheckpointComplete() +HistoryManagerImpl::maybeCheckpointComplete(uint32_t lcl) { - uint32_t lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); if (!publishCheckpointOnLedgerClose(lcl, mApp.getConfig()) || !mApp.getHistoryArchiveManager().publishEnabled()) { @@ -702,7 +699,7 @@ HistoryManagerImpl::restoreCheckpoint(uint32_t lcl) }); // Maybe finalize checkpoint if we're at a checkpoint boundary and // haven't rotated yet. No-op if checkpoint has been rotated already - maybeCheckpointComplete(); + maybeCheckpointComplete(lcl); } } diff --git a/src/history/HistoryManagerImpl.h b/src/history/HistoryManagerImpl.h index 0ae2ae3378..60c146fd26 100644 --- a/src/history/HistoryManagerImpl.h +++ b/src/history/HistoryManagerImpl.h @@ -46,15 +46,15 @@ class HistoryManagerImpl : public HistoryManager void logAndUpdatePublishStatus() override; - bool maybeQueueHistoryCheckpoint() override; + bool maybeQueueHistoryCheckpoint(uint32_t lcl) override; - void queueCurrentHistory() override; + void queueCurrentHistory(uint32_t lcl) override; void takeSnapshotAndPublish(HistoryArchiveState const& has); size_t publishQueuedHistory() override; - void maybeCheckpointComplete() override; + void maybeCheckpointComplete(uint32_t lcl) override; void dropSQLBasedPublish() override; std::vector diff --git a/src/invariant/BucketListIsConsistentWithDatabase.cpp b/src/invariant/BucketListIsConsistentWithDatabase.cpp index 798059d9de..529077e721 100644 --- a/src/invariant/BucketListIsConsistentWithDatabase.cpp +++ b/src/invariant/BucketListIsConsistentWithDatabase.cpp @@ -7,6 +7,8 @@ #include "bucket/BucketManager.h" #include "bucket/LiveBucket.h" #include "bucket/LiveBucketList.h" +#include "crypto/Hex.h" +#include "database/Database.h" #include "history/HistoryArchive.h" #include "invariant/InvariantManager.h" #include "ledger/LedgerManager.h" @@ -157,7 +159,8 @@ BucketListIsConsistentWithDatabase::checkEntireBucketlist() throw std::runtime_error(s); } - if (mApp.getPersistentState().getState(PersistentState::kDBBackend) != + if (mApp.getPersistentState().getState(PersistentState::kDBBackend, + mApp.getDatabase().getSession()) != BucketIndex::DB_BACKEND_STATE) { throw std::runtime_error( diff --git a/src/ledger/LedgerHeaderUtils.cpp b/src/ledger/LedgerHeaderUtils.cpp index 0835439355..952b0c34d2 100644 --- a/src/ledger/LedgerHeaderUtils.cpp +++ b/src/ledger/LedgerHeaderUtils.cpp @@ -43,7 +43,7 @@ isValid(LedgerHeader const& lh) } void -storeInDatabase(Database& db, LedgerHeader const& header) +storeInDatabase(Database& db, LedgerHeader const& header, SessionWrapper& sess) { ZoneScoped; if (!isValid(header)) @@ -64,7 +64,8 @@ storeInDatabase(Database& db, LedgerHeader const& header) "INSERT INTO ledgerheaders " "(ledgerhash, prevhash, bucketlisthash, ledgerseq, closetime, data) " "VALUES " - "(:h, :ph, :blh, :seq, :ct, :data)"); + "(:h, :ph, :blh, :seq, :ct, :data)", + sess); auto& st = prep.statement(); st.exchange(soci::use(hash)); st.exchange(soci::use(prevHash)); @@ -112,7 +113,8 @@ loadByHash(Database& db, Hash const& hash) std::string headerEncoded; auto prep = db.getPreparedStatement("SELECT data FROM ledgerheaders " - "WHERE ledgerhash = :h"); + "WHERE ledgerhash = :h", + db.getSession()); auto& st = prep.statement(); st.exchange(soci::into(headerEncoded)); st.exchange(soci::use(hash_s)); @@ -144,8 +146,8 @@ loadMaxLedgerSeq(Database& db) ZoneScoped; uint32_t seq = 0; soci::indicator maxIndicator; - auto prep = - db.getPreparedStatement("SELECT MAX(ledgerseq) FROM ledgerheaders"); + auto prep = db.getPreparedStatement( + "SELECT MAX(ledgerseq) FROM ledgerheaders", db.getSession()); auto& st = prep.statement(); st.exchange(soci::into(seq, maxIndicator)); st.define_and_bind(); @@ -188,10 +190,10 @@ loadBySequence(Database& db, soci::session& sess, uint32_t seq) } void -deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count) +deleteOldEntries(soci::session& sess, uint32_t ledgerSeq, uint32_t count) { ZoneScoped; - DatabaseUtils::deleteOldEntriesHelper(db.getSession(), ledgerSeq, count, + DatabaseUtils::deleteOldEntriesHelper(sess, ledgerSeq, count, "ledgerheaders", "ledgerseq"); } @@ -232,17 +234,17 @@ dropAll(Database& db) { std::string coll = db.getSimpleCollationClause(); - db.getSession() << "DROP TABLE IF EXISTS ledgerheaders;"; - db.getSession() << "CREATE TABLE ledgerheaders (" - << "ledgerhash CHARACTER(64) " << coll - << " PRIMARY KEY," - << "prevhash CHARACTER(64) NOT NULL," - "bucketlisthash CHARACTER(64) NOT NULL," - "ledgerseq INT UNIQUE CHECK (ledgerseq >= 0)," - "closetime BIGINT NOT NULL CHECK (closetime >= 0)," - "data TEXT NOT NULL" - ");"; - db.getSession() + db.getRawSession() << "DROP TABLE IF EXISTS ledgerheaders;"; + db.getRawSession() + << "CREATE TABLE ledgerheaders (" + << "ledgerhash CHARACTER(64) " << coll << " PRIMARY KEY," + << "prevhash CHARACTER(64) NOT NULL," + "bucketlisthash CHARACTER(64) NOT NULL," + "ledgerseq INT UNIQUE CHECK (ledgerseq >= 0)," + "closetime BIGINT NOT NULL CHECK (closetime >= 0)," + "data TEXT NOT NULL" + ");"; + db.getRawSession() << "CREATE INDEX ledgersbyseq ON ledgerheaders ( ledgerseq );"; } } diff --git a/src/ledger/LedgerHeaderUtils.h b/src/ledger/LedgerHeaderUtils.h index e165570e1a..ae50c082d7 100644 --- a/src/ledger/LedgerHeaderUtils.h +++ b/src/ledger/LedgerHeaderUtils.h @@ -18,7 +18,8 @@ uint32_t getFlags(LedgerHeader const& lh); bool isValid(LedgerHeader const& lh); -void storeInDatabase(Database& db, LedgerHeader const& header); +void storeInDatabase(Database& db, LedgerHeader const& header, + SessionWrapper& sess); LedgerHeader decodeFromData(std::string const& data); @@ -29,7 +30,7 @@ std::shared_ptr loadBySequence(Database& db, soci::session& sess, uint32_t loadMaxLedgerSeq(Database& db); -void deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count); +void deleteOldEntries(soci::session& sess, uint32_t ledgerSeq, uint32_t count); size_t copyToStream(Database& db, soci::session& sess, uint32_t ledgerSeq, uint32_t ledgerCount, CheckpointBuilder& checkpointBuilder); diff --git a/src/ledger/LedgerManager.h b/src/ledger/LedgerManager.h index 4a181701c4..aaf32b3de8 100644 --- a/src/ledger/LedgerManager.h +++ b/src/ledger/LedgerManager.h @@ -64,6 +64,7 @@ class LedgerManager }; virtual void moveToSynced() = 0; + virtual void beginApply() = 0; virtual State getState() const = 0; virtual std::string getStateHuman() const = 0; @@ -90,7 +91,8 @@ class LedgerManager // close event. This is the most common cause of LedgerManager advancing // from one ledger to the next: the network reached consensus on // `ledgerData`. - virtual void valueExternalized(LedgerCloseData const& ledgerData) = 0; + virtual void valueExternalized(LedgerCloseData const& ledgerData, + bool isLatestSlot) = 0; // Return the LCL header and (complete, immutable) hash. virtual LedgerHeaderHistoryEntry const& @@ -101,6 +103,7 @@ class LedgerManager // return the HAS that corresponds to the last closed ledger as persisted in // the database + // This function return of copy of latest HAS, so it's thread-safe. virtual HistoryArchiveState getLastClosedLedgerHAS() = 0; // Return the sequence number of the LCL. @@ -174,9 +177,18 @@ class LedgerManager // Forcibly close the current ledger, applying `ledgerData` as the consensus // changes. This is normally done automatically as part of - // `valueExternalized()`; this method is present in the public interface to - // permit testing. - virtual void closeLedger(LedgerCloseData const& ledgerData) = 0; + // `valueExternalized()` during normal operation (in which case + // `calledViaExternalize` should be set to true), but can also be called + // directly by catchup (with `calledViaExternalize` false in this case). + virtual void closeLedger(LedgerCloseData const& ledgerData, + bool calledViaExternalize) = 0; +#ifdef BUILD_TESTS + void + closeLedger(LedgerCloseData const& ledgerData) + { + closeLedger(ledgerData, /* externalize */ false); + } +#endif // deletes old entries stored in the database virtual void deleteOldEntries(Database& db, uint32_t ledgerSeq, @@ -192,5 +204,7 @@ class LedgerManager virtual ~LedgerManager() { } + + virtual bool isApplying() const = 0; }; } diff --git a/src/ledger/LedgerManagerImpl.cpp b/src/ledger/LedgerManagerImpl.cpp index cb71f05339..f61f0f8207 100644 --- a/src/ledger/LedgerManagerImpl.cpp +++ b/src/ledger/LedgerManagerImpl.cpp @@ -40,6 +40,7 @@ #include "util/XDRCereal.h" #include "util/XDRStream.h" #include "work/WorkScheduler.h" +#include "xdrpp/printer.h" #include @@ -165,9 +166,20 @@ LedgerManagerImpl::moveToSynced() setState(LM_SYNCED_STATE); } +void +LedgerManagerImpl::beginApply() +{ + // Go into "applying" state, this will prevent catchup from starting + mCurrentlyApplyingLedger = true; + // Notify Herder that application star:ted, so it won't fire out of sync + // timer + mApp.getHerder().beginApply(); +} + void LedgerManagerImpl::setState(State s) { + releaseAssert(threadIsMain()); if (s != getState()) { std::string oldState = getStateHuman(); @@ -238,7 +250,10 @@ LedgerManagerImpl::startNewLedger(LedgerHeader const& genesisLedger) CLOG_INFO(Ledger, "Established genesis ledger, closing"); CLOG_INFO(Ledger, "Root account: {}", skey.getStrKeyPublic()); CLOG_INFO(Ledger, "Root account seed: {}", skey.getStrKeySeed().value); - ledgerClosed(ltx, /*ledgerCloseMeta*/ nullptr, /*initialLedgerVers*/ 0); + auto output = + ledgerClosed(ltx, /*ledgerCloseMeta*/ nullptr, /*initialLedgerVers*/ 0); + updateCurrentLedgerState(output); + ltx.commit(); } @@ -272,8 +287,8 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) ZoneScoped; // Step 1. Load LCL state from the DB and extract latest ledger hash - string lastLedger = - mApp.getPersistentState().getState(PersistentState::kLastClosedLedger); + string lastLedger = mApp.getPersistentState().getState( + PersistentState::kLastClosedLedger, mApp.getDatabase().getSession()); if (lastLedger.empty()) { @@ -284,6 +299,11 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) CLOG_INFO(Ledger, "Last closed ledger (LCL) hash is {}", lastLedger); Hash lastLedgerHash = hexToBin256(lastLedger); + HistoryArchiveState has; + has.fromString(mApp.getPersistentState().getState( + PersistentState::kHistoryArchiveState, + mApp.getDatabase().getSession())); + // Step 2. Restore LedgerHeader from DB based on the ledger hash derived // earlier, or verify we're at genesis if in no-history mode std::optional latestLedgerHeader; @@ -306,7 +326,7 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) { throw std::runtime_error("Could not load ledger from database"); } - HistoryArchiveState has = getLastClosedLedgerHAS(); + if (currentLedger->ledgerSeq != has.currentLedger) { throw std::runtime_error("Invalid database state: last known " @@ -333,7 +353,6 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) releaseAssert(latestLedgerHeader.has_value()); - HistoryArchiveState has = getLastClosedLedgerHAS(); auto missing = mApp.getBucketManager().checkForMissingBucketsFiles(has); auto pubmissing = mApp.getHistoryManager().getMissingBucketsReferencedByPublishQueue(); @@ -366,7 +385,8 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) } // Step 4. Restore LedgerManager's internal state - advanceLedgerPointers(*latestLedgerHeader); + auto output = advanceLedgerPointers(*latestLedgerHeader, has); + updateCurrentLedgerState(output); // Maybe truncate checkpoint files if we're restarting after a crash // in closeLedger (in which case any modifications to the ledger state have @@ -485,14 +505,8 @@ LedgerManagerImpl::getLastClosedLedgerHeader() const HistoryArchiveState LedgerManagerImpl::getLastClosedLedgerHAS() { - ZoneScoped; releaseAssert(threadIsMain()); - - string hasString = mApp.getPersistentState().getState( - PersistentState::kHistoryArchiveState); - HistoryArchiveState has; - has.fromString(hasString); - return has; + return mLastClosedLedgerHAS; } uint32_t @@ -513,7 +527,6 @@ LedgerManagerImpl::getSorobanNetworkConfigReadOnly() SorobanNetworkConfig const& LedgerManagerImpl::getSorobanNetworkConfigForApply() { - // Must be called from ledger close thread only releaseAssert(mSorobanNetworkConfigForApply); return *mSorobanNetworkConfigForApply; } @@ -597,12 +610,11 @@ LedgerManagerImpl::publishSorobanMetrics() // called by txherder void -LedgerManagerImpl::valueExternalized(LedgerCloseData const& ledgerData) +LedgerManagerImpl::valueExternalized(LedgerCloseData const& ledgerData, + bool isLatestSlot) { ZoneScoped; - - // Capture LCL before we do any processing (which may trigger ledger close) - auto lcl = getLastClosedLedgerNum(); + releaseAssert(threadIsMain()); CLOG_INFO(Ledger, "Got consensus: [seq={}, prev={}, txs={}, ops={}, sv: {}]", @@ -620,67 +632,20 @@ LedgerManagerImpl::valueExternalized(LedgerCloseData const& ledgerData) releaseAssert(false); } - closeLedgerIf(ledgerData); - auto& cm = mApp.getCatchupManager(); - - cm.processLedger(ledgerData); - - // We set the state to synced - // if we have closed the latest ledger we have heard of. - bool appliedLatest = false; - if (cm.getLargestLedgerSeqHeard() == getLastClosedLedgerNum()) - { - setState(LM_SYNCED_STATE); - appliedLatest = true; - } - - // New ledger(s) got closed, notify Herder - if (getLastClosedLedgerNum() > lcl) - { - CLOG_DEBUG(Ledger, - "LedgerManager::valueExternalized LCL advanced {} -> {}", - lcl, getLastClosedLedgerNum()); - mApp.getHerder().lastClosedLedgerIncreased(appliedLatest); - } -} - -void -LedgerManagerImpl::closeLedgerIf(LedgerCloseData const& ledgerData) -{ - ZoneScoped; - if (mLastClosedLedger.header.ledgerSeq + 1 == ledgerData.getLedgerSeq()) - { - auto& cm = mApp.getCatchupManager(); - // if catchup work is running, we don't want ledger manager to close - // this ledger and potentially cause issues. - if (cm.isCatchupInitialized() && !cm.catchupWorkIsDone()) - { - CLOG_INFO( - Ledger, - "Can't close ledger: {} in LM because catchup is running", - ledgerAbbrev(mLastClosedLedger)); - return; - } - - closeLedger(ledgerData); - CLOG_INFO(Ledger, "Closed ledger: {}", ledgerAbbrev(mLastClosedLedger)); - } - else if (ledgerData.getLedgerSeq() <= mLastClosedLedger.header.ledgerSeq) - { - CLOG_INFO( - Ledger, - "Skipping close ledger: local state is {}, more recent than {}", - mLastClosedLedger.header.ledgerSeq, ledgerData.getLedgerSeq()); - } - else + auto res = cm.processLedger(ledgerData, isLatestSlot); + // Go into catchup if we have any future ledgers we're unable to apply + // sequentially. + if (res == + CatchupManager::ProcessLedgerResult::WAIT_TO_APPLY_BUFFERED_OR_CATCHUP) { if (mState != LM_CATCHING_UP_STATE) { // Out of sync, buffer what we just heard and start catchup. - CLOG_INFO( - Ledger, "Lost sync, local LCL is {}, network closed ledger {}", - mLastClosedLedger.header.ledgerSeq, ledgerData.getLedgerSeq()); + CLOG_INFO(Ledger, + "Lost sync, local LCL is {}, network closed ledger {}", + getLastClosedLedgerHeader().header.ledgerSeq, + ledgerData.getLedgerSeq()); } setState(LM_CATCHING_UP_STATE); @@ -746,6 +711,85 @@ LedgerManagerImpl::emitNextMeta() mNextMetaToEmit.reset(); } +void +maybeSimulateSleep(Config const& cfg, size_t opSize, + LogSlowExecution& closeTime) +{ + if (!cfg.OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.empty()) + { + // Sleep for a parameterized amount of time in simulation mode + std::discrete_distribution distribution( + cfg.OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.begin(), + cfg.OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.end()); + std::chrono::microseconds sleepFor{0}; + for (size_t i = 0; i < opSize; i++) + { + sleepFor += + cfg.OP_APPLY_SLEEP_TIME_DURATION_FOR_TESTING[distribution( + gRandomEngine)]; + } + std::chrono::microseconds applicationTime = + closeTime.checkElapsedTime(); + if (applicationTime < sleepFor) + { + sleepFor -= applicationTime; + CLOG_DEBUG(Perf, "Simulate application: sleep for {} microseconds", + sleepFor.count()); + std::this_thread::sleep_for(sleepFor); + } + } +} + +asio::io_context& +getMetaIOContext(Application& app) +{ + return app.getConfig().parallelLedgerClose() + ? app.getLedgerCloseIOContext() + : app.getClock().getIOContext(); +} + +void +LedgerManagerImpl::ledgerCloseComplete(uint32_t lcl, bool calledViaExternalize, + LedgerCloseData const& ledgerData) +{ + // We just finished applying `lcl`, maybe change LM's state + // Also notify Herder so it can trigger next ledger. + + releaseAssert(threadIsMain()); + uint32_t latestHeardFromNetwork = + mApp.getCatchupManager().getLargestLedgerSeqHeard(); + uint32_t latestQueuedToApply = + mApp.getCatchupManager().getMaxScheduledToApply(); + if (calledViaExternalize) + { + releaseAssert(lcl <= latestQueuedToApply); + releaseAssert(latestQueuedToApply <= latestHeardFromNetwork); + } + + if (lcl == latestQueuedToApply) + { + mCurrentlyApplyingLedger = false; + } + + // Continue execution on the main thread + // if we have closed the latest ledger we have heard of, set state to + // "synced" + bool appliedLatest = false; + + if (latestHeardFromNetwork == lcl) + { + mApp.getLedgerManager().moveToSynced(); + appliedLatest = true; + } + + if (calledViaExternalize) + { + // New ledger(s) got closed, notify Herder + mApp.getHerder().lastClosedLedgerIncreased(appliedLatest, + ledgerData.getTxSet()); + } +} + /* This is the main method that closes the current ledger based on the close context that was computed by SCP or by the historical module @@ -753,8 +797,14 @@ during replays. */ void -LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) +LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, + bool calledViaExternalize) { + if (mApp.isStopping()) + { + return; + } + #ifdef BUILD_TESTS mLastLedgerTxMeta.clear(); #endif @@ -766,9 +816,13 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) LedgerTxn ltx(mApp.getLedgerTxnRoot()); auto header = ltx.loadHeader(); + auto prevHeader = + threadIsMain() ? getLastClosedLedgerHeader().header : header.current(); + auto prevHash = xdrSha256(prevHeader); + auto initialLedgerVers = header.current().ledgerVersion; ++header.current().ledgerSeq; - header.current().previousLedgerHash = mLastClosedLedger.hash; + header.current().previousLedgerHash = prevHash; CLOG_DEBUG(Ledger, "starting closeLedger() on ledgerSeq={}", header.current().ledgerSeq); @@ -776,6 +830,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) auto now = mApp.getClock().now(); mLedgerAgeClosed.Update(now - mLastClose); + // mLastClose is only accessed by a single thread mLastClose = now; mLedgerAge.set_count(0); @@ -793,15 +848,14 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) header.current().ledgerVersion)); } - if (txSet->previousLedgerHash() != getLastClosedLedgerHeader().hash) + if (txSet->previousLedgerHash() != prevHash) { CLOG_ERROR(Ledger, "TxSet mismatch: LCD wants {}, LCL is {}", ledgerAbbrev(ledgerData.getLedgerSeq() - 1, txSet->previousLedgerHash()), - ledgerAbbrev(getLastClosedLedgerHeader())); + ledgerAbbrev(prevHeader)); - CLOG_ERROR(Ledger, "{}", - xdrToCerealString(getLastClosedLedgerHeader(), "Full LCL")); + CLOG_ERROR(Ledger, "{}", xdrToCerealString(prevHeader, "Full LCL")); CLOG_ERROR(Ledger, "{}", POSSIBLY_CORRUPTED_LOCAL_DATA); throw std::runtime_error("txset mismatch"); @@ -845,8 +899,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) { if (mNextMetaToEmit) { - releaseAssert(mNextMetaToEmit->ledgerHeader().hash == - getLastClosedLedgerHeader().hash); + releaseAssert(mNextMetaToEmit->ledgerHeader().hash == prevHash); emitNextMeta(); } releaseAssert(!mNextMetaToEmit); @@ -866,8 +919,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) applicableTxSet->getTxsInApplyOrder(); // first, prefetch source accounts for txset, then charge fees - prefetchTxSourceIds(txs); - + prefetchTxSourceIds(mApp.getLedgerTxnRoot(), txs, mApp.getConfig()); auto const mutableTxResults = processFeesSeqNums( txs, ltx, *applicableTxSet, ledgerCloseMeta, ledgerData); @@ -943,10 +995,11 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) updateNetworkConfig(ltx); } - ledgerClosed(ltx, ledgerCloseMeta, initialLedgerVers); + auto closeLedgerResult = + ledgerClosed(ltx, ledgerCloseMeta, initialLedgerVers); if (ledgerData.getExpectedHash() && - *ledgerData.getExpectedHash() != mLastClosedLedger.hash) + *ledgerData.getExpectedHash() != closeLedgerResult.ledgerHeader.hash) { throw std::runtime_error("Local node's ledger corrupted during close"); } @@ -954,7 +1007,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) if (mMetaStream || mMetaDebugStream) { releaseAssert(ledgerCloseMeta); - ledgerCloseMeta->ledgerHeader() = mLastClosedLedger; + ledgerCloseMeta->ledgerHeader() = closeLedgerResult.ledgerHeader; // At this point we've got a complete meta and we can store it to the // member variable: if we throw while committing below, we will at worst @@ -967,7 +1020,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) // This is unfortunate and it would be nice if we could make it not // be so subtle, but for the time being this is where we are. // - // 1. Queue any history-checkpoint to the database, _within_ the current + // 1. Queue any history-checkpoint, _within_ the current // transaction. This way if there's a crash after commit and before // we've published successfully, we'll re-publish on restart. // @@ -978,20 +1031,25 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) // between commit and this step, core will attempt finalizing files again // on restart. // - // 4. Start any queued checkpoint publishing, _after_ the commit so that + // 4. Start background eviction scan for the next ledger, _after_ the commit + // so that it takes its snapshot of network setting from the + // committed state. + // + // 5. Start any queued checkpoint publishing, _after_ the commit so that // it takes its snapshot of history-rows from the committed state, but // _before_ we GC any buckets (because this is the step where the // bucket refcounts are incremented for the duration of the publish). // - // 5. Start background eviction scan for the next ledger, _after_ the commit - // so that it takes its snapshot of network setting from the - // committed state. - // // 6. GC unreferenced buckets. Only do this once publishes are in progress. + // + // 7. Finally, relfect newly closed ledger in LedgerManager's and Herder's + // states: maybe move into SYNCED state, trigger next ledger, etc. - // step 1 + // Step 1. Maybe queue the current checkpoint file for publishing; this + // should not race with main, since publish on main begins strictly _after_ + // this call. auto& hm = mApp.getHistoryManager(); - hm.maybeQueueHistoryCheckpoint(); + hm.maybeQueueHistoryCheckpoint(ledgerSeq); // step 2 ltx.commit(); @@ -1001,64 +1059,75 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) #endif // step 3 - hm.maybeCheckpointComplete(); - - // step 4 - hm.publishQueuedHistory(); - hm.logAndUpdatePublishStatus(); + hm.maybeCheckpointComplete(ledgerSeq); - // step 5 + // Step 4 if (protocolVersionStartsFrom(initialLedgerVers, SOROBAN_PROTOCOL_VERSION)) { - mApp.getBucketManager().startBackgroundEvictionScan(ledgerSeq + 1, - initialLedgerVers); + mApp.getBucketManager().startBackgroundEvictionScan( + ledgerSeq + 1, initialLedgerVers, + getSorobanNetworkConfigForApply()); } - // step 6 - mApp.getBucketManager().forgetUnreferencedBuckets(); + // Invoke completion handler on the _main_ thread: kick off publishing, + // cleanup bucket files, notify herder to trigger next ledger + auto completionHandler = + [this, txs, ledgerSeq, calledViaExternalize, ledgerData, + ledgerOutput = std::move(closeLedgerResult)]() mutable { + releaseAssert(threadIsMain()); + updateCurrentLedgerState(ledgerOutput); + + // Step 5. Maybe kick off publishing on complete checkpoint files + auto& hm = mApp.getHistoryManager(); + hm.publishQueuedHistory(); + hm.logAndUpdatePublishStatus(); + + // Step 6. Clean up unreferenced buckets post-apply + { + // Ledger state might be updated at the same time, so protect GC + // call with state mutex + std::lock_guard guard(mLedgerStateMutex); + mApp.getBucketManager().forgetUnreferencedBuckets( + getLastClosedLedgerHAS()); + } + + // Step 7. Maybe set LedgerManager into synced state, maybe let + // Herder trigger next ledger + ledgerCloseComplete(ledgerSeq, calledViaExternalize, ledgerData); + CLOG_INFO(Ledger, "Ledger close complete: {}", ledgerSeq); + }; - if (!mApp.getConfig().OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.empty()) + if (threadIsMain()) { - // Sleep for a parameterized amount of time in simulation mode - std::discrete_distribution distribution( - mApp.getConfig().OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.begin(), - mApp.getConfig().OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.end()); - std::chrono::microseconds sleepFor{0}; - auto txSetSizeOp = applicableTxSet->sizeOpTotal(); - for (size_t i = 0; i < txSetSizeOp; i++) - { - sleepFor += - mApp.getConfig() - .OP_APPLY_SLEEP_TIME_DURATION_FOR_TESTING[distribution( - gRandomEngine)]; - } - std::chrono::microseconds applicationTime = - closeLedgerTime.checkElapsedTime(); - if (applicationTime < sleepFor) - { - sleepFor -= applicationTime; - CLOG_DEBUG(Perf, "Simulate application: sleep for {} microseconds", - sleepFor.count()); - std::this_thread::sleep_for(sleepFor); - } + completionHandler(); + } + else + { + mApp.postOnMainThread(completionHandler, "ledgerCloseComplete"); } + maybeSimulateSleep(mApp.getConfig(), txs.size(), closeLedgerTime); std::chrono::duration ledgerTimeSeconds = ledgerTime.Stop(); - CLOG_DEBUG(Perf, "Applied ledger in {} seconds", ledgerTimeSeconds.count()); + CLOG_DEBUG(Perf, "Applied ledger {} in {} seconds", ledgerSeq, + ledgerTimeSeconds.count()); FrameMark; } - void LedgerManagerImpl::deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count) { ZoneScoped; - soci::transaction txscope(db.getSession()); - db.clearPreparedStatementCache(); - LedgerHeaderUtils::deleteOldEntries(db, ledgerSeq, count); - HerderPersistence::deleteOldEntries(db, ledgerSeq, count); - db.clearPreparedStatementCache(); - txscope.commit(); + if (mApp.getConfig().parallelLedgerClose()) + { + auto session = + std::make_unique(mApp.getDatabase().getPool()); + LedgerHeaderUtils::deleteOldEntries(*session, ledgerSeq, count); + } + else + { + LedgerHeaderUtils::deleteOldEntries(db.getRawSession(), ledgerSeq, + count); + } } void @@ -1066,15 +1135,17 @@ LedgerManagerImpl::setLastClosedLedger( LedgerHeaderHistoryEntry const& lastClosed, bool storeInDB) { ZoneScoped; + releaseAssert(threadIsMain()); LedgerTxn ltx(mApp.getLedgerTxnRoot()); auto header = ltx.loadHeader(); header.current() = lastClosed.header; - storeCurrentLedger(header.current(), storeInDB, - /* appendToCheckpoint */ false); + auto has = storeCurrentLedger(header.current(), storeInDB, + /* appendToCheckpoint */ false); ltx.commit(); mRebuildInMemoryState = false; - advanceLedgerPointers(lastClosed.header); + updateCurrentLedgerState(advanceLedgerPointers(lastClosed.header, has)); + LedgerTxn ltx2(mApp.getLedgerTxnRoot()); if (protocolVersionStartsFrom(ltx2.loadHeader().current().ledgerVersion, SOROBAN_PROTOCOL_VERSION)) @@ -1092,7 +1163,12 @@ LedgerManagerImpl::manuallyAdvanceLedgerHeader(LedgerHeader const& header) "May only manually advance ledger header sequence number with " "MANUAL_CLOSE and RUN_STANDALONE"); } - advanceLedgerPointers(header, false); + HistoryArchiveState has; + has.fromString(mApp.getPersistentState().getState( + PersistentState::kHistoryArchiveState, + mApp.getDatabase().getSession())); + auto output = advanceLedgerPointers(header, has, false); + updateCurrentLedgerState(output); } void @@ -1109,9 +1185,9 @@ LedgerManagerImpl::setupLedgerCloseMetaStream() { // We can't be sure we're writing to a stream that supports fsync; // pipes typically error when you try. So we don't do it. - mMetaStream = std::make_unique( - mApp.getClock().getIOContext(), - /*fsyncOnClose=*/false); + mMetaStream = + std::make_unique(getMetaIOContext(mApp), + /*fsyncOnClose=*/false); std::regex fdrx("^fd:([0-9]+)$"); std::smatch sm; if (std::regex_match(cfg.METADATA_OUTPUT_STREAM, sm, fdrx)) @@ -1176,9 +1252,9 @@ LedgerManagerImpl::maybeResetLedgerCloseMetaDebugStream(uint32_t ledgerSeq) // such stream or a replacement for the one we just handed off to // flush-and-rotate. Either way, we should not have an existing one! releaseAssert(!mMetaDebugStream); - auto tmpStream = std::make_unique( - mApp.getClock().getIOContext(), - /*fsyncOnClose=*/true); + auto tmpStream = + std::make_unique(getMetaIOContext(mApp), + /*fsyncOnClose=*/true); auto metaDebugPath = metautils::getMetaDebugFilePath( mApp.getBucketManager().getBucketDir(), ledgerSeq); @@ -1239,23 +1315,32 @@ LedgerManagerImpl::getCurrentLedgerStateSnaphot() } void +LedgerManagerImpl::updateCurrentLedgerState(CloseLedgerOutput const& output) +{ + releaseAssert(threadIsMain()); + CLOG_DEBUG( + Ledger, "Advancing LCL: {} -> {}", ledgerAbbrev(mLastClosedLedger), + ledgerAbbrev(output.ledgerHeader.header, output.ledgerHeader.hash)); + + // Update ledger state as seen by the main thread + mLastClosedLedger = output.ledgerHeader; + mLastClosedLedgerHAS = output.has; + mSorobanNetworkConfigReadOnly = output.sorobanConfig; + mReadOnlyLedgerStateSnapshot = output.snapshot; +} + +LedgerManagerImpl::CloseLedgerOutput LedgerManagerImpl::advanceLedgerPointers(LedgerHeader const& header, + HistoryArchiveState const& has, bool debugLog) { auto ledgerHash = xdrSha256(header); - if (debugLog) - { - CLOG_DEBUG(Ledger, "Advancing LCL: {} -> {}", - ledgerAbbrev(mLastClosedLedger), - ledgerAbbrev(header, ledgerHash)); - } - - // NB: with parallel ledger close, this will have to be called strictly from - // the main thread, - mLastClosedLedger.hash = ledgerHash; - mLastClosedLedger.header = header; - mSorobanNetworkConfigReadOnly = mSorobanNetworkConfigForApply; + CloseLedgerOutput res; + res.ledgerHeader.hash = ledgerHash; + res.ledgerHeader.header = header; + res.has = has; + res.sorobanConfig = mSorobanNetworkConfigForApply; auto& bm = mApp.getBucketManager(); auto liveSnapshot = std::make_unique>( @@ -1263,22 +1348,20 @@ LedgerManagerImpl::advanceLedgerPointers(LedgerHeader const& header, auto hotArchiveSnapshot = std::make_unique>( bm.getHotArchiveBucketList(), header); + // Updating BL snapshot is thread-safe bm.getBucketSnapshotManager().updateCurrentSnapshot( std::move(liveSnapshot), std::move(hotArchiveSnapshot)); - // NB: with parallel ledger close, this will have to be called strictly from - // the main thread, - mReadOnlyLedgerStateSnapshot = + res.snapshot = bm.getBucketSnapshotManager().copySearchableLiveBucketListSnapshot(); + return res; } void -LedgerManagerImpl::updateNetworkConfig(AbstractLedgerTxn& rootLtx) +LedgerManagerImpl::updateNetworkConfig(AbstractLedgerTxn& ltx) { ZoneScoped; - releaseAssert(threadIsMain()); - - uint32_t ledgerVersion = rootLtx.loadHeader().current().ledgerVersion; + uint32_t ledgerVersion = ltx.loadHeader().current().ledgerVersion; if (protocolVersionStartsFrom(ledgerVersion, SOROBAN_PROTOCOL_VERSION)) { @@ -1288,7 +1371,7 @@ LedgerManagerImpl::updateNetworkConfig(AbstractLedgerTxn& rootLtx) std::make_shared(); } mSorobanNetworkConfigForApply->loadFromLedger( - rootLtx, mApp.getConfig().CURRENT_LEDGER_PROTOCOL_VERSION, + ltx, mApp.getConfig().CURRENT_LEDGER_PROTOCOL_VERSION, ledgerVersion); publishSorobanMetrics(); } @@ -1338,7 +1421,6 @@ LedgerManagerImpl::processFeesSeqNums( auto expectedResults = ledgerData.getExpectedResults(); if (expectedResults) { - releaseAssert(mApp.getCatchupManager().isCatchupInitialized()); expectedResultsIter = std::make_optional(expectedResults->results.begin()); } @@ -1436,26 +1518,28 @@ LedgerManagerImpl::processFeesSeqNums( void LedgerManagerImpl::prefetchTxSourceIds( - std::vector const& txs) + AbstractLedgerTxnParent& ltx, + std::vector const& txs, Config const& config) { ZoneScoped; - if (mApp.getConfig().PREFETCH_BATCH_SIZE > 0) + if (config.PREFETCH_BATCH_SIZE > 0) { UnorderedSet keys; for (auto const& tx : txs) { tx->insertKeysForFeeProcessing(keys); } - mApp.getLedgerTxnRoot().prefetchClassic(keys); + ltx.prefetchClassic(keys); } } void LedgerManagerImpl::prefetchTransactionData( - std::vector const& txs) + AbstractLedgerTxnParent& ltx, + std::vector const& txs, Config const& config) { ZoneScoped; - if (mApp.getConfig().PREFETCH_BATCH_SIZE > 0) + if (config.PREFETCH_BATCH_SIZE > 0) { UnorderedSet sorobanKeys; auto lkMeter = make_unique(); @@ -1475,10 +1559,10 @@ LedgerManagerImpl::prefetchTransactionData( // into the performance of each mode. if (!sorobanKeys.empty()) { - mApp.getLedgerTxnRoot().prefetchSoroban(sorobanKeys, lkMeter.get()); + ltx.prefetchSoroban(sorobanKeys, lkMeter.get()); } - mApp.getLedgerTxnRoot().prefetchClassic(classicKeys); + ltx.prefetchClassic(classicKeys); } } @@ -1508,7 +1592,7 @@ LedgerManagerImpl::applyTransactions( ltx.loadHeader().current().ledgerSeq, txSet.summary()); } - prefetchTransactionData(txs); + prefetchTransactionData(mApp.getLedgerTxnRoot(), txs, mApp.getConfig()); Hash sorobanBasePrngSeed = txSet.getContentsHash(); uint64_t txNum{0}; @@ -1605,7 +1689,7 @@ LedgerManagerImpl::logTxApplyMetrics(AbstractLedgerTxn& ltx, size_t numTxs, TracyPlot("ledger.prefetch.hit-rate", hitRate); } -void +HistoryArchiveState LedgerManagerImpl::storeCurrentLedger(LedgerHeader const& header, bool storeHeader, bool appendToCheckpoint) { @@ -1613,8 +1697,16 @@ LedgerManagerImpl::storeCurrentLedger(LedgerHeader const& header, Hash hash = xdrSha256(header); releaseAssert(!isZero(hash)); + auto& sess = mApp.getLedgerTxnRoot().getSession(); mApp.getPersistentState().setState(PersistentState::kLastClosedLedger, - binToHex(hash)); + binToHex(hash), sess); + + if (mApp.getConfig().ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING.count() > + 0) + { + std::this_thread::sleep_for( + mApp.getConfig().ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING); + } LiveBucketList bl; if (mApp.getConfig().MODE_ENABLES_BUCKETLIST) @@ -1627,16 +1719,18 @@ LedgerManagerImpl::storeCurrentLedger(LedgerHeader const& header, mApp.getConfig().NETWORK_PASSPHRASE); mApp.getPersistentState().setState(PersistentState::kHistoryArchiveState, - has.toString()); + has.toString(), sess); if (mApp.getConfig().MODE_STORES_HISTORY_LEDGERHEADERS && storeHeader) { - LedgerHeaderUtils::storeInDatabase(mApp.getDatabase(), header); + LedgerHeaderUtils::storeInDatabase(mApp.getDatabase(), header, sess); if (appendToCheckpoint) { mApp.getHistoryManager().appendLedgerHeader(header); } } + + return has; } // NB: This is a separate method so a testing subclass can override it. @@ -1647,6 +1741,7 @@ LedgerManagerImpl::transferLedgerEntriesToBucketList( LedgerHeader lh, uint32_t initialLedgerVers) { ZoneScoped; + // `ledgerClosed` protects this call with a mutex std::vector initEntries, liveEntries; std::vector deadEntries; auto blEnabled = mApp.getConfig().MODE_ENABLES_BUCKETLIST; @@ -1665,7 +1760,8 @@ LedgerManagerImpl::transferLedgerEntriesToBucketList( auto evictedState = mApp.getBucketManager().resolveBackgroundEvictionScan( - ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers); + ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers, + *mSorobanNetworkConfigForApply); if (protocolVersionStartsFrom( initialLedgerVers, @@ -1695,13 +1791,14 @@ LedgerManagerImpl::transferLedgerEntriesToBucketList( } } -void +LedgerManagerImpl::CloseLedgerOutput LedgerManagerImpl::ledgerClosed( AbstractLedgerTxn& ltx, std::unique_ptr const& ledgerCloseMeta, uint32_t initialLedgerVers) { ZoneScoped; + std::lock_guard guard(mLedgerStateMutex); auto ledgerSeq = ltx.loadHeader().current().ledgerSeq; auto currLedgerVers = ltx.loadHeader().current().ledgerVersion; CLOG_TRACE(Ledger, @@ -1733,15 +1830,18 @@ LedgerManagerImpl::ledgerClosed( protocolVersionStartsFrom(initialLedgerVers, SOROBAN_PROTOCOL_VERSION)) { ledgerCloseMeta->setNetworkConfiguration( - getSorobanNetworkConfigReadOnly(), + getSorobanNetworkConfigForApply(), mApp.getConfig().EMIT_LEDGER_CLOSE_META_EXT_V1); } - ltx.unsealHeader([this](LedgerHeader& lh) { + CloseLedgerOutput res; + ltx.unsealHeader([this, &res](LedgerHeader& lh) { mApp.getBucketManager().snapshotLedger(lh); - storeCurrentLedger(lh, /* storeHeader */ true, - /* appendToCheckpoint */ true); - advanceLedgerPointers(lh); + auto has = storeCurrentLedger(lh, /* storeHeader */ true, + /* appendToCheckpoint */ true); + res = advanceLedgerPointers(lh, has); }); + + return res; } } diff --git a/src/ledger/LedgerManagerImpl.h b/src/ledger/LedgerManagerImpl.h index 985aaed877..20eaae711f 100644 --- a/src/ledger/LedgerManagerImpl.h +++ b/src/ledger/LedgerManagerImpl.h @@ -42,6 +42,14 @@ class BasicWork; class LedgerManagerImpl : public LedgerManager { protected: + struct CloseLedgerOutput + { + LedgerHeaderHistoryEntry ledgerHeader; + std::shared_ptr sorobanConfig; + HistoryArchiveState has; + std::shared_ptr snapshot; + }; + Application& mApp; std::unique_ptr mMetaStream; std::unique_ptr mMetaDebugStream; @@ -49,6 +57,8 @@ class LedgerManagerImpl : public LedgerManager std::filesystem::path mMetaDebugPath; private: + // Cache LCL state, updates once a ledger (synchronized with + // mLedgerStateMutex) LedgerHeaderHistoryEntry mLastClosedLedger; // Read-only Soroban network configuration, accessible by main thread only. @@ -64,6 +74,7 @@ class LedgerManagerImpl : public LedgerManager // variable is not synchronized, since it should only be used by one thread // (main or ledger close). std::shared_ptr mSorobanNetworkConfigForApply; + HistoryArchiveState mLastClosedLedgerHAS; SorobanMetrics mSorobanMetrics; medida::Timer& mTransactionApply; @@ -83,12 +94,16 @@ class LedgerManagerImpl : public LedgerManager bool mRebuildInMemoryState{false}; SearchableSnapshotConstPtr mReadOnlyLedgerStateSnapshot; - std::unique_ptr mStartCatchup; + // Use mutex to guard read access to LCL and Soroban network config + mutable std::recursive_mutex mLedgerStateMutex; + medida::Timer& mCatchupDuration; std::unique_ptr mNextMetaToEmit; - std::vector processFeesSeqNums( + bool mCurrentlyApplyingLedger{false}; + + static std::vector processFeesSeqNums( std::vector const& txs, AbstractLedgerTxn& ltxOuter, ApplicableTxSetFrame const& txSet, std::unique_ptr const& ledgerCloseMeta, @@ -104,17 +119,22 @@ class LedgerManagerImpl : public LedgerManager // initialLedgerVers must be the ledger version at the start of the ledger. // On the ledger in which a protocol upgrade from vN to vN + 1 occurs, // initialLedgerVers must be vN. - void + CloseLedgerOutput ledgerClosed(AbstractLedgerTxn& ltx, std::unique_ptr const& ledgerCloseMeta, uint32_t initialLedgerVers); - void storeCurrentLedger(LedgerHeader const& header, bool storeHeader, - bool appendToCheckpoint); - void - prefetchTransactionData(std::vector const& txs); - void prefetchTxSourceIds(std::vector const& txs); - void closeLedgerIf(LedgerCloseData const& ledgerData); + HistoryArchiveState storeCurrentLedger(LedgerHeader const& header, + bool storeHeader, + bool appendToCheckpoint); + static void + prefetchTransactionData(AbstractLedgerTxnParent& rootLtx, + std::vector const& txs, + Config const& config); + static void + prefetchTxSourceIds(AbstractLedgerTxnParent& rootLtx, + std::vector const& txs, + Config const& config); State mState; @@ -130,6 +150,8 @@ class LedgerManagerImpl : public LedgerManager // as the actual ledger usage. void publishSorobanMetrics(); + void updateCurrentLedgerState(CloseLedgerOutput const& output); + protected: // initialLedgerVers must be the ledger version at the start of the ledger // and currLedgerVers is the ledger version in the current ltx header. These @@ -144,8 +166,11 @@ class LedgerManagerImpl : public LedgerManager std::unique_ptr const& ledgerCloseMeta, LedgerHeader lh, uint32_t initialLedgerVers); - void advanceLedgerPointers(LedgerHeader const& header, - bool debugLog = true); + // Update in-memory cached LCL state (this only happens at the end of ledger + // close) + CloseLedgerOutput advanceLedgerPointers(LedgerHeader const& header, + HistoryArchiveState const& has, + bool debugLog = true); void logTxApplyMetrics(AbstractLedgerTxn& ltx, size_t numTxs, size_t numOps); @@ -157,10 +182,12 @@ class LedgerManagerImpl : public LedgerManager // This call is read-only and hence `ltx` can be read-only. void updateNetworkConfig(AbstractLedgerTxn& ltx) override; void moveToSynced() override; + void beginApply() override; State getState() const override; std::string getStateHuman() const override; - void valueExternalized(LedgerCloseData const& ledgerData) override; + void valueExternalized(LedgerCloseData const& ledgerData, + bool isLatestSlot) override; uint32_t getLastMaxTxSetSize() const override; uint32_t getLastMaxTxSetSizeOps() const override; @@ -201,7 +228,10 @@ class LedgerManagerImpl : public LedgerManager std::shared_ptr archive, std::set> bucketsToRetain) override; - void closeLedger(LedgerCloseData const& ledgerData) override; + void closeLedger(LedgerCloseData const& ledgerData, + bool calledViaExternalize) override; + void ledgerCloseComplete(uint32_t lcl, bool calledViaExternalize, + LedgerCloseData const& ledgerData); void deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count) override; @@ -215,5 +245,10 @@ class LedgerManagerImpl : public LedgerManager SorobanMetrics& getSorobanMetrics() override; SearchableSnapshotConstPtr getCurrentLedgerStateSnaphot() override; + virtual bool + isApplying() const override + { + return mCurrentlyApplyingLedger; + } }; } diff --git a/src/ledger/test/InMemoryLedgerTxnRoot.cpp b/src/ledger/test/InMemoryLedgerTxnRoot.cpp index 75f1bb06c6..d2d2bd4ab1 100644 --- a/src/ledger/test/InMemoryLedgerTxnRoot.cpp +++ b/src/ledger/test/InMemoryLedgerTxnRoot.cpp @@ -140,7 +140,7 @@ void InMemoryLedgerTxnRoot::prepareNewObjects(size_t) SessionWrapper& InMemoryLedgerTxnRoot::getSession() const { - throw std::runtime_error("ERROR!!"); + throw std::runtime_error("called InMemoryLedgerTxnRoot::getSession"); } #ifdef BUILD_TESTS diff --git a/src/main/AppConnector.cpp b/src/main/AppConnector.cpp index d282e7eddd..904d6396f1 100644 --- a/src/main/AppConnector.cpp +++ b/src/main/AppConnector.cpp @@ -53,17 +53,22 @@ AppConnector::getSorobanNetworkConfigReadOnly() const return mApp.getLedgerManager().getSorobanNetworkConfigReadOnly(); } +SorobanNetworkConfig const& +AppConnector::getSorobanNetworkConfigForApply() const +{ + // releaseAssert(!threadIsMain() || !mConfig.parallelLedgerClose()); + return mApp.getLedgerManager().getSorobanNetworkConfigForApply(); +} + medida::MetricsRegistry& AppConnector::getMetrics() const { - releaseAssert(threadIsMain()); return mApp.getMetrics(); } SorobanMetrics& AppConnector::getSorobanMetrics() const { - releaseAssert(threadIsMain()); return mApp.getLedgerManager().getSorobanMetrics(); } @@ -72,7 +77,8 @@ AppConnector::checkOnOperationApply(Operation const& operation, OperationResult const& opres, LedgerTxnDelta const& ltxDelta) { - releaseAssert(threadIsMain()); + // Only one thread can call this method + releaseAssert(threadIsMain() || mConfig.parallelLedgerClose()); mApp.getInvariantManager().checkOnOperationApply(operation, opres, ltxDelta); } @@ -80,7 +86,7 @@ AppConnector::checkOnOperationApply(Operation const& operation, Hash const& AppConnector::getNetworkID() const { - releaseAssert(threadIsMain()); + // NetworkID is a const return mApp.getNetworkID(); } @@ -137,4 +143,11 @@ AppConnector::checkScheduledAndCache( return mApp.getOverlayManager().checkScheduledAndCache(msgTracker); } +LedgerHeaderHistoryEntry +AppConnector::getLastClosedLedgerHeader() const +{ + // LCL is thread-safe (it's a copy) + return mApp.getLedgerManager().getLastClosedLedgerHeader(); +} + } \ No newline at end of file diff --git a/src/main/AppConnector.h b/src/main/AppConnector.h index ec35925133..4f2c565982 100644 --- a/src/main/AppConnector.h +++ b/src/main/AppConnector.h @@ -34,8 +34,6 @@ class AppConnector OverlayManager& getOverlayManager(); BanManager& getBanManager(); bool shouldYield() const; - SorobanNetworkConfig const& getSorobanNetworkConfigReadOnly() const; - medida::MetricsRegistry& getMetrics() const; SorobanMetrics& getSorobanMetrics() const; void checkOnOperationApply(Operation const& operation, OperationResult const& opres, @@ -55,5 +53,10 @@ class AppConnector // This method is always exclusively called from one thread bool checkScheduledAndCache(std::shared_ptr msgTracker); + SorobanNetworkConfig const& getSorobanNetworkConfigReadOnly() const; + SorobanNetworkConfig const& getSorobanNetworkConfigForApply() const; + + medida::MetricsRegistry& getMetrics() const; + LedgerHeaderHistoryEntry getLastClosedLedgerHeader() const; }; } \ No newline at end of file diff --git a/src/main/Application.cpp b/src/main/Application.cpp index 991c11f48a..8ff31b3b4f 100644 --- a/src/main/Application.cpp +++ b/src/main/Application.cpp @@ -4,6 +4,7 @@ #include "Application.h" #include "ApplicationImpl.h" +#include "database/Database.h" #include "util/GlobalChecks.h" #include @@ -21,12 +22,13 @@ validateNetworkPassphrase(Application::pointer app) } auto& persistentState = app->getPersistentState(); - std::string prevNetworkPassphrase = - persistentState.getState(PersistentState::kNetworkPassphrase); + std::string prevNetworkPassphrase = persistentState.getState( + PersistentState::kNetworkPassphrase, app->getDatabase().getSession()); if (prevNetworkPassphrase.empty()) { persistentState.setState(PersistentState::kNetworkPassphrase, - networkPassphrase); + networkPassphrase, + app->getDatabase().getSession()); } else if (networkPassphrase != prevNetworkPassphrase) { diff --git a/src/main/Application.h b/src/main/Application.h index ae23517a57..ccaf4af374 100644 --- a/src/main/Application.h +++ b/src/main/Application.h @@ -229,6 +229,7 @@ class Application virtual asio::io_context& getWorkerIOContext() = 0; virtual asio::io_context& getEvictionIOContext() = 0; virtual asio::io_context& getOverlayIOContext() = 0; + virtual asio::io_context& getLedgerCloseIOContext() = 0; virtual void postOnMainThread( std::function&& f, std::string&& name, @@ -242,6 +243,8 @@ class Application std::string jobName) = 0; virtual void postOnOverlayThread(std::function&& f, std::string jobName) = 0; + virtual void postOnLedgerCloseThread(std::function&& f, + std::string jobName) = 0; // Perform actions necessary to transition from BOOTING_STATE to other // states. In particular: either reload or reinitialize the database, and diff --git a/src/main/ApplicationImpl.cpp b/src/main/ApplicationImpl.cpp index 482ceef177..022f09785f 100644 --- a/src/main/ApplicationImpl.cpp +++ b/src/main/ApplicationImpl.cpp @@ -92,6 +92,13 @@ ApplicationImpl::ApplicationImpl(VirtualClock& clock, Config const& cfg) , mOverlayWork(mOverlayIOContext ? std::make_unique( *mOverlayIOContext) : nullptr) + , mLedgerCloseIOContext(mConfig.parallelLedgerClose() + ? std::make_unique(1) + : nullptr) + , mLedgerCloseWork( + mLedgerCloseIOContext + ? std::make_unique(*mLedgerCloseIOContext) + : nullptr) , mWorkerThreads() , mEvictionThread() , mStopSignals(clock.getIOContext(), SIGINT) @@ -107,6 +114,8 @@ ApplicationImpl::ApplicationImpl(VirtualClock& clock, Config const& cfg) mMetrics->NewTimer({"app", "post-on-background-thread", "delay"})) , mPostOnOverlayThreadDelay( mMetrics->NewTimer({"app", "post-on-overlay-thread", "delay"})) + , mPostOnLedgerCloseThreadDelay( + mMetrics->NewTimer({"app", "post-on-ledger-close-thread", "delay"})) , mStartedOn(clock.system_now()) { #ifdef SIGQUIT @@ -173,6 +182,12 @@ ApplicationImpl::ApplicationImpl(VirtualClock& clock, Config const& cfg) // Keep priority unchanged as overlay processes time-sensitive tasks mOverlayThread = std::thread{[this]() { mOverlayIOContext->run(); }}; } + + if (mConfig.parallelLedgerClose()) + { + mLedgerCloseThread = + std::thread{[this]() { mLedgerCloseIOContext->run(); }}; + } } static void @@ -182,7 +197,7 @@ maybeRebuildLedger(Application& app, bool applyBuckets) if (ps.shouldRebuildForOfferTable()) { app.getDatabase().clearPreparedStatementCache(); - soci::transaction tx(app.getDatabase().getSession()); + soci::transaction tx(app.getDatabase().getRawSession()); LOG_INFO(DEFAULT_LOG, "Dropping offers"); app.getLedgerTxnRoot().dropOffers(); tx.commit(); @@ -536,10 +551,11 @@ ApplicationImpl::scheduleSelfCheck(bool waitUntilNextCheckpoint) { // Delay until a second full checkpoint-period after the next checkpoint // publication. The captured lhhe should usually be published by then. - auto& hm = getHistoryManager(); auto targetLedger = - hm.firstLedgerAfterCheckpointContaining(lhhe.header.ledgerSeq); - targetLedger = hm.firstLedgerAfterCheckpointContaining(targetLedger); + HistoryManager::firstLedgerAfterCheckpointContaining( + lhhe.header.ledgerSeq, getConfig()); + targetLedger = HistoryManager::firstLedgerAfterCheckpointContaining( + targetLedger, getConfig()); auto cond = [targetLedger](Application& app) -> bool { auto& lm = app.getLedgerManager(); return lm.getLastClosedLedgerNum() > targetLedger; @@ -571,8 +587,13 @@ ApplicationImpl::getNetworkID() const ApplicationImpl::~ApplicationImpl() { LOG_INFO(DEFAULT_LOG, "Application destructing"); + mStopping = true; try { + // First, shutdown ledger close queue _before_ shutting down all the + // subsystems This ensures that any ledger currently being closed + // finishes okay + shutdownLedgerCloseThread(); shutdownWorkScheduler(); if (mProcessManager) { @@ -773,6 +794,7 @@ ApplicationImpl::gracefulStop() return; } mStopping = true; + shutdownLedgerCloseThread(); if (mOverlayManager) { mOverlayManager->shutdown(); @@ -788,7 +810,8 @@ ApplicationImpl::gracefulStop() // This call happens in shutdown -- before destruction -- so that we can // be sure other subsystems (ledger etc.) are still alive and we can // call into them to figure out which buckets _are_ referenced. - mBucketManager->forgetUnreferencedBuckets(); + mBucketManager->forgetUnreferencedBuckets( + mLedgerManager->getLastClosedLedgerHAS()); mBucketManager->shutdown(); } if (mHerder) @@ -819,6 +842,21 @@ ApplicationImpl::shutdownWorkScheduler() } } +void +ApplicationImpl::shutdownLedgerCloseThread() +{ + if (mLedgerCloseThread && !mLedgerCloseThreadStopped) + { + if (mLedgerCloseWork) + { + mLedgerCloseWork.reset(); + } + LOG_INFO(DEFAULT_LOG, "Joining the ledger close thread"); + mLedgerCloseThread->join(); + mLedgerCloseThreadStopped = true; + } +} + void ApplicationImpl::joinAllThreads() { @@ -833,6 +871,10 @@ ApplicationImpl::joinAllThreads() { mOverlayWork.reset(); } + if (mEvictionWork) + { + mEvictionWork.reset(); + } LOG_INFO(DEFAULT_LOG, "Joining {} worker threads", mWorkerThreads.size()); for (auto& w : mWorkerThreads) @@ -840,9 +882,10 @@ ApplicationImpl::joinAllThreads() w.join(); } - if (mEvictionWork) + if (mOverlayThread) { - mEvictionWork.reset(); + LOG_INFO(DEFAULT_LOG, "Joining the overlay thread"); + mOverlayThread->join(); } if (mEvictionThread) @@ -851,12 +894,6 @@ ApplicationImpl::joinAllThreads() mEvictionThread->join(); } - if (mOverlayThread) - { - LOG_INFO(DEFAULT_LOG, "Joining the overlay thread"); - mOverlayThread->join(); - } - LOG_INFO(DEFAULT_LOG, "Joined all {} threads", (mWorkerThreads.size() + 1)); } @@ -1345,6 +1382,13 @@ ApplicationImpl::getOverlayIOContext() return *mOverlayIOContext; } +asio::io_context& +ApplicationImpl::getLedgerCloseIOContext() +{ + releaseAssert(mLedgerCloseIOContext); + return *mLedgerCloseIOContext; +} + void ApplicationImpl::postOnMainThread(std::function&& f, std::string&& name, Scheduler::ActionType type) @@ -1402,6 +1446,19 @@ ApplicationImpl::postOnOverlayThread(std::function&& f, }); } +void +ApplicationImpl::postOnLedgerCloseThread(std::function&& f, + std::string jobName) +{ + releaseAssert(mLedgerCloseIOContext); + LogSlowExecution isSlow{std::move(jobName), LogSlowExecution::Mode::MANUAL, + "executed after"}; + asio::post(*mLedgerCloseIOContext, [this, f = std::move(f), isSlow]() { + mPostOnLedgerCloseThreadDelay.Update(isSlow.checkElapsedTime()); + f(); + }); +} + void ApplicationImpl::enableInvariantsFromConfig() { @@ -1444,8 +1501,6 @@ ApplicationImpl::createDatabase() AbstractLedgerTxnParent& ApplicationImpl::getLedgerTxnRoot() { - releaseAssert(threadIsMain()); - #ifdef BUILD_TESTS if (mConfig.MODE_USES_IN_MEMORY_LEDGER) { diff --git a/src/main/ApplicationImpl.h b/src/main/ApplicationImpl.h index 1fc7ea989c..b37093c831 100644 --- a/src/main/ApplicationImpl.h +++ b/src/main/ApplicationImpl.h @@ -82,6 +82,7 @@ class ApplicationImpl : public Application virtual asio::io_context& getWorkerIOContext() override; virtual asio::io_context& getEvictionIOContext() override; virtual asio::io_context& getOverlayIOContext() override; + virtual asio::io_context& getLedgerCloseIOContext() override; virtual void postOnMainThread(std::function&& f, std::string&& name, Scheduler::ActionType type) override; @@ -92,6 +93,8 @@ class ApplicationImpl : public Application virtual void postOnOverlayThread(std::function&& f, std::string jobName) override; + virtual void postOnLedgerCloseThread(std::function&& f, + std::string jobName) override; virtual void start() override; void startServices(); @@ -158,6 +161,9 @@ class ApplicationImpl : public Application std::unique_ptr mOverlayIOContext; std::unique_ptr mOverlayWork; + std::unique_ptr mLedgerCloseIOContext; + std::unique_ptr mLedgerCloseWork; + std::unique_ptr mBucketManager; std::unique_ptr mDatabase; std::unique_ptr mOverlayManager; @@ -206,6 +212,7 @@ class ApplicationImpl : public Application std::vector mWorkerThreads; std::optional mOverlayThread; + std::optional mLedgerCloseThread; // Unlike mWorkerThreads (which are low priority), eviction scans require a // medium priority thread. In the future, this may become a more general @@ -216,7 +223,8 @@ class ApplicationImpl : public Application asio::signal_set mStopSignals; bool mStarted; - bool mStopping; + std::atomic mStopping; + bool mLedgerCloseThreadStopped{false}; VirtualTimer mStoppingTimer; VirtualTimer mSelfCheckTimer; @@ -225,6 +233,7 @@ class ApplicationImpl : public Application medida::Timer& mPostOnMainThreadDelay; medida::Timer& mPostOnBackgroundThreadDelay; medida::Timer& mPostOnOverlayThreadDelay; + medida::Timer& mPostOnLedgerCloseThreadDelay; VirtualClock::system_time_point mStartedOn; @@ -258,5 +267,6 @@ class ApplicationImpl : public Application void upgradeToCurrentSchemaAndMaybeRebuildLedger(bool applyBuckets, bool forceRebuild); + void shutdownLedgerCloseThread(); }; } diff --git a/src/main/ApplicationUtils.cpp b/src/main/ApplicationUtils.cpp index 6898facd6e..81c0cacc13 100644 --- a/src/main/ApplicationUtils.cpp +++ b/src/main/ApplicationUtils.cpp @@ -166,8 +166,8 @@ bool applyBucketsForLCL(Application& app) { auto has = app.getLedgerManager().getLastClosedLedgerHAS(); - auto lclHash = - app.getPersistentState().getState(PersistentState::kLastClosedLedger); + auto lclHash = app.getPersistentState().getState( + PersistentState::kLastClosedLedger, app.getDatabase().getSession()); auto maxProtocolVersion = app.getConfig().LEDGER_PROTOCOL_VERSION; auto currentLedger = @@ -240,10 +240,9 @@ setAuthenticatedLedgerHashPair(Application::pointer app, uint32_t startLedger, std::string startHash) { auto const& lm = app->getLedgerManager(); - auto const& hm = app->getHistoryManager(); auto tryCheckpoint = [&](uint32_t seq, Hash h) { - if (hm.isLastLedgerInCheckpoint(seq)) + if (HistoryManager::isLastLedgerInCheckpoint(seq, app->getConfig())) { LOG_INFO(DEFAULT_LOG, "Found authenticated checkpoint hash {} for ledger {}", @@ -272,7 +271,7 @@ setAuthenticatedLedgerHashPair(Application::pointer app, if (lm.isSynced()) { - auto const& lhe = lm.getLastClosedLedgerHeader(); + auto lhe = lm.getLastClosedLedgerHeader(); tryCheckpoint(lhe.header.ledgerSeq, lhe.hash); } else @@ -307,7 +306,8 @@ selfCheck(Config cfg) // Then we scan all the buckets to check they have expected hashes. LOG_INFO(DEFAULT_LOG, "Self-check phase 2: bucket hash verification"); - auto seq2 = app->getBucketManager().scheduleVerifyReferencedBucketsWork(); + auto seq2 = app->getBucketManager().scheduleVerifyReferencedBucketsWork( + app->getLedgerManager().getLastClosedLedgerHAS()); while (clock.crank(true) && !seq2->isDone()) ; @@ -965,18 +965,20 @@ publish(Application::pointer app) asio::io_context::work mainWork(io); auto lcl = app->getLedgerManager().getLastClosedLedgerNum(); - auto isCheckpoint = app->getHistoryManager().isLastLedgerInCheckpoint(lcl); + auto isCheckpoint = + HistoryManager::isLastLedgerInCheckpoint(lcl, app->getConfig()); size_t expectedPublishQueueSize = isCheckpoint ? 1 : 0; app->getHistoryManager().publishQueuedHistory(); - while (app->getHistoryManager().publishQueueLength() != + while (HistoryManager::publishQueueLength(app->getConfig()) != expectedPublishQueueSize && clock.crank(true)) { } // Cleanup buckets not referenced by publish queue anymore - app->getBucketManager().forgetUnreferencedBuckets(); + app->getBucketManager().forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); LOG_INFO(DEFAULT_LOG, "*"); LOG_INFO(DEFAULT_LOG, "* Publish finished."); diff --git a/src/main/Maintainer.cpp b/src/main/Maintainer.cpp index 0b11d7658b..3130bc202a 100644 --- a/src/main/Maintainer.cpp +++ b/src/main/Maintainer.cpp @@ -76,14 +76,14 @@ Maintainer::performMaintenance(uint32_t count) // Calculate the minimum of the LCL and/or any queued checkpoint. uint32_t lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); - uint32_t ql = mApp.getHistoryManager().getMinLedgerQueuedToPublish(); + uint32_t ql = HistoryManager::getMinLedgerQueuedToPublish(mApp.getConfig()); uint32_t qmin = ql == 0 ? lcl : std::min(ql, lcl); // Next calculate, given qmin, the first ledger it'd be _safe to // delete_ while still keeping everything required to publish. // So if qmin is (for example) 0x7f = 127, then we want to keep 64 // ledgers before that, and therefore can erase 0x3f = 63 and less. - uint32_t freq = mApp.getHistoryManager().getCheckpointFrequency(); + uint32_t freq = HistoryManager::getCheckpointFrequency(mApp.getConfig()); uint32_t lmin = qmin >= freq ? qmin - freq : 0; CLOG_INFO(History, "Trimming history <= ledger {}", lmin); diff --git a/src/main/test/ApplicationUtilsTests.cpp b/src/main/test/ApplicationUtilsTests.cpp index b8a12017ad..93f5616739 100644 --- a/src/main/test/ApplicationUtilsTests.cpp +++ b/src/main/test/ApplicationUtilsTests.cpp @@ -194,7 +194,7 @@ class SimulationHelper return std::make_pair(selectedLedger, selectedHash); } - LedgerHeaderHistoryEntry const& + LedgerHeaderHistoryEntry getMainNodeLCL() { return mSimulation->getNode(mMainNodeID) @@ -202,7 +202,7 @@ class SimulationHelper .getLastClosedLedgerHeader(); } - LedgerHeaderHistoryEntry const& + LedgerHeaderHistoryEntry getTestNodeLCL() { return mSimulation->getNode(mTestNodeID) diff --git a/src/overlay/BanManagerImpl.cpp b/src/overlay/BanManagerImpl.cpp index a433ef94e0..297af877dc 100644 --- a/src/overlay/BanManagerImpl.cpp +++ b/src/overlay/BanManagerImpl.cpp @@ -44,7 +44,8 @@ BanManagerImpl::banNode(NodeID nodeID) { ZoneNamedN(insertBanZone, "insert ban", true); auto prep = mApp.getDatabase().getPreparedStatement( - "INSERT INTO ban (nodeid) VALUES(:n)"); + "INSERT INTO ban (nodeid) VALUES(:n)", + mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(soci::use(nodeIDString)); st.define_and_bind(); @@ -61,7 +62,8 @@ BanManagerImpl::unbanNode(NodeID nodeID) { ZoneNamedN(deleteBanZone, "delete ban", true); auto prep = mApp.getDatabase().getPreparedStatement( - "DELETE FROM ban WHERE nodeid = :n;"); + "DELETE FROM ban WHERE nodeid = :n;", + mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(soci::use(nodeIDString)); st.define_and_bind(); @@ -77,7 +79,8 @@ BanManagerImpl::isBanned(NodeID nodeID) { ZoneNamedN(selectBanZone, "select ban", true); auto prep = mApp.getDatabase().getPreparedStatement( - "SELECT count(*) FROM ban WHERE nodeid = :n"); + "SELECT count(*) FROM ban WHERE nodeid = :n", + mApp.getDatabase().getSession()); uint32_t count; auto& st = prep.statement(); st.exchange(soci::into(count)); @@ -96,8 +99,8 @@ BanManagerImpl::getBans() std::string nodeIDString; { ZoneNamedN(selectBanZone, "select ban", true); - auto prep = - mApp.getDatabase().getPreparedStatement("SELECT nodeid FROM ban"); + auto prep = mApp.getDatabase().getPreparedStatement( + "SELECT nodeid FROM ban", mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(soci::into(nodeIDString)); st.define_and_bind(); @@ -114,10 +117,10 @@ BanManagerImpl::getBans() void BanManager::dropAll(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS ban"; + db.getRawSession() << "DROP TABLE IF EXISTS ban"; - db.getSession() << "CREATE TABLE ban (" - "nodeid CHARACTER(56) NOT NULL PRIMARY KEY" - ")"; + db.getRawSession() << "CREATE TABLE ban (" + "nodeid CHARACTER(56) NOT NULL PRIMARY KEY" + ")"; } } diff --git a/src/overlay/PeerManager.cpp b/src/overlay/PeerManager.cpp index 4abfbc946c..a27eaec3ec 100644 --- a/src/overlay/PeerManager.cpp +++ b/src/overlay/PeerManager.cpp @@ -180,7 +180,8 @@ PeerManager::removePeersWithManyFailures(size_t minNumFailures, sql += " AND ip = :ip"; } - auto prep = db.getPreparedStatement(sql); + auto prep = + db.getPreparedStatement(sql, mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(use(minNumFailures)); @@ -237,7 +238,8 @@ PeerManager::load(PeerBareAddress const& address) { auto prep = mApp.getDatabase().getPreparedStatement( "SELECT numfailures, nextattempt, type FROM peers " - "WHERE ip = :v1 AND port = :v2"); + "WHERE ip = :v1 AND port = :v2", + mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(into(result.mNumFailures)); st.exchange(into(result.mNextAttempt)); @@ -294,7 +296,8 @@ PeerManager::store(PeerBareAddress const& address, PeerRecord const& peerRecord, try { - auto prep = mApp.getDatabase().getPreparedStatement(query); + auto prep = mApp.getDatabase().getPreparedStatement( + query, mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(use(peerRecord.mNextAttempt)); st.exchange(use(peerRecord.mNumFailures)); @@ -503,7 +506,8 @@ PeerManager::countPeers(std::string const& where, { std::string sql = "SELECT COUNT(*) FROM peers WHERE " + where; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement( + sql, mApp.getDatabase().getSession()); auto& st = prep.statement(); bind(st); @@ -533,7 +537,8 @@ PeerManager::loadPeers(size_t limit, size_t offset, std::string const& where, "FROM peers WHERE " + where + " LIMIT :limit OFFSET :offset"; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement( + sql, mApp.getDatabase().getSession()); auto& st = prep.statement(); bind(st); @@ -570,8 +575,8 @@ PeerManager::loadPeers(size_t limit, size_t offset, std::string const& where, void PeerManager::dropAll(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS peers;"; - db.getSession() << kSQLCreateStatement; + db.getRawSession() << "DROP TABLE IF EXISTS peers;"; + db.getRawSession() << kSQLCreateStatement; } std::vector> @@ -588,7 +593,8 @@ PeerManager::loadAllPeers() int port; PeerRecord record; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement( + sql, mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(into(ip)); @@ -621,7 +627,7 @@ void PeerManager::storePeers( std::vector> peers) { - soci::transaction tx(mApp.getDatabase().getSession()); + soci::transaction tx(mApp.getDatabase().getRawSession()); for (auto const& peer : peers) { store(peer.first, peer.second, /* inDatabase */ false); diff --git a/src/overlay/test/OverlayManagerTests.cpp b/src/overlay/test/OverlayManagerTests.cpp index 5255100fba..afba967e8b 100644 --- a/src/overlay/test/OverlayManagerTests.cpp +++ b/src/overlay/test/OverlayManagerTests.cpp @@ -165,7 +165,7 @@ class OverlayManagerTests pm.storeConfigPeers(); } - rowset rs = app->getDatabase().getSession().prepare + rowset rs = app->getDatabase().getRawSession().prepare << "SELECT ip,port,type FROM peers ORDER BY ip, port"; auto& ppeers = pm.mConfigurationPreferredPeers; @@ -213,7 +213,7 @@ class OverlayManagerTests pm.mResolvedPeers.wait(); pm.tick(); - rowset rs = app->getDatabase().getSession().prepare + rowset rs = app->getDatabase().getRawSession().prepare << "SELECT ip,port,type FROM peers ORDER BY ip, port"; int found = 0; diff --git a/src/simulation/CoreTests.cpp b/src/simulation/CoreTests.cpp index ab91094f72..db4f8766a1 100644 --- a/src/simulation/CoreTests.cpp +++ b/src/simulation/CoreTests.cpp @@ -691,7 +691,8 @@ TEST_CASE("Bucket list entries vs write throughput", "[scalability][!hide]") batch.GetSnapshot().get99thPercentile(), batch.max(), (double)merges.count(), merges.max(), merges.mean()}); - app->getBucketManager().forgetUnreferencedBuckets(); + app->getBucketManager().forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); } } } diff --git a/src/simulation/LoadGenerator.cpp b/src/simulation/LoadGenerator.cpp index 94063d83a8..949f40b40f 100644 --- a/src/simulation/LoadGenerator.cpp +++ b/src/simulation/LoadGenerator.cpp @@ -1214,6 +1214,13 @@ LoadGenerator::checkAccountSynced(Application& app, bool isCreate) account->getAccountId()); result.push_back(account); } + else if (app.getHerder().sourceAccountPending( + account->getPublicKey())) + { + CLOG_TRACE(LoadGen, "Account {} is pending!", + account->getAccountId()); + result.push_back(account); + } } else if (!reloadRes) { diff --git a/src/simulation/TxGenerator.cpp b/src/simulation/TxGenerator.cpp index 59794f1473..07e2f22108 100644 --- a/src/simulation/TxGenerator.cpp +++ b/src/simulation/TxGenerator.cpp @@ -1016,7 +1016,7 @@ TxGenerator::sorobanRandomUploadResources() // Estimate VM instantiation cost, with some additional buffer to increase // the chance that this instruction count is sufficient. - ContractCostParamEntry const& vmInstantiationCosts = + ContractCostParamEntry vmInstantiationCosts = mApp.getLedgerManager() .getSorobanNetworkConfigReadOnly() .cpuCostParams()[VmInstantiation]; diff --git a/src/test/FuzzerImpl.cpp b/src/test/FuzzerImpl.cpp index afa750f206..d8eea5d123 100644 --- a/src/test/FuzzerImpl.cpp +++ b/src/test/FuzzerImpl.cpp @@ -887,7 +887,8 @@ resetTxInternalState(Application& app) app.getLedgerTxnRoot().resetForFuzzer(); app.getInvariantManager().resetForFuzzer(); #endif // BUILD_TESTS - app.getDatabase().clearPreparedStatementCache(); + app.getDatabase().clearPreparedStatementCache( + app.getDatabase().getSession()); } // FuzzTransactionFrame is a specialized TransactionFrame that includes @@ -926,9 +927,10 @@ class FuzzTransactionFrame : public TransactionFrame LedgerSnapshot ltxStmt(ltx); // if any ill-formed Operations, do not attempt transaction application auto isInvalidOperation = [&](auto const& op, auto& opResult) { - return !op->checkValid(app.getAppConnector(), signatureChecker, - ltxStmt, false, opResult, - mTxResult->getSorobanData()); + return !op->checkValid( + app.getAppConnector(), signatureChecker, + app.getAppConnector().getSorobanNetworkConfigReadOnly(), + ltxStmt, false, opResult, mTxResult->getSorobanData()); }; auto const& ops = getOperations(); diff --git a/src/test/TestUtils.cpp b/src/test/TestUtils.cpp index 488d4a00bd..c73139d093 100644 --- a/src/test/TestUtils.cpp +++ b/src/test/TestUtils.cpp @@ -36,6 +36,30 @@ crankFor(VirtualClock& clock, VirtualClock::duration duration) ; } +void +crankUntil(Application::pointer app, std::function const& predicate, + VirtualClock::duration timeout) +{ + crankUntil(*app, predicate, timeout); +} + +void +crankUntil(Application& app, std::function const& predicate, + VirtualClock::duration timeout) +{ + auto start = std::chrono::system_clock::now(); + while (!predicate()) + { + app.getClock().crank(false); + auto current = std::chrono::system_clock::now(); + auto diff = current - start; + if (diff > timeout) + { + break; + } + } +} + void shutdownWorkScheduler(Application& app) { diff --git a/src/test/TestUtils.h b/src/test/TestUtils.h index d0175a4e0e..c54c6cf600 100644 --- a/src/test/TestUtils.h +++ b/src/test/TestUtils.h @@ -23,7 +23,11 @@ namespace testutil { void crankSome(VirtualClock& clock); void crankFor(VirtualClock& clock, VirtualClock::duration duration); - +void crankUntil(Application::pointer app, + std::function const& predicate, + VirtualClock::duration timeout); +void crankUntil(Application& app, std::function const& predicate, + VirtualClock::duration timeout); void shutdownWorkScheduler(Application& app); std::vector getInvalidAssets(SecretKey const& issuer); diff --git a/src/test/TxTests.cpp b/src/test/TxTests.cpp index 25f6fbf8d3..8d2a7c1995 100644 --- a/src/test/TxTests.cpp +++ b/src/test/TxTests.cpp @@ -372,6 +372,9 @@ applyCheck(TransactionTestFramePtr tx, Application& app, bool checkSeqNum) recordOrCheckGlobalTestTxMetadata(tm.getXDR()); } + // TODO: in-memory mode doesn't work with parallel ledger close because + // it manually modifies LedgerTxn without closing a ledger; this results + // in a different ledger header stored inside of LedgerTxn ltx.commit(); return res; @@ -631,15 +634,15 @@ loadAccount(AbstractLedgerTxn& ltx, PublicKey const& k, bool mustExist) bool doesAccountExist(Application& app, PublicKey const& k) { - LedgerTxn ltx(app.getLedgerTxnRoot()); - return (bool)stellar::loadAccountWithoutRecord(ltx, k); + LedgerSnapshot lss(app); + return (bool)lss.getAccount(k); } xdr::xvector getAccountSigners(PublicKey const& k, Application& app) { - LedgerTxn ltx(app.getLedgerTxnRoot()); - auto account = stellar::loadAccount(ltx, k); + LedgerSnapshot lss(app); + auto account = lss.getAccount(k); return account.current().data.account().signers; } @@ -699,11 +702,8 @@ transactionFromOperations(Application& app, SecretKey const& from, SequenceNumber seq, const std::vector& ops, uint32_t fee) { - uint32_t ledgerVersion; - { - LedgerTxn ltx(app.getLedgerTxnRoot()); - ledgerVersion = ltx.loadHeader().current().ledgerVersion; - } + auto ledgerVersion = + app.getLedgerManager().getLastClosedLedgerHeader().header.ledgerVersion; if (protocolVersionIsBefore(ledgerVersion, ProtocolVersion::V_13)) { return transactionFromOperationsV0(app, from, seq, ops, fee); diff --git a/src/transactions/ExtendFootprintTTLOpFrame.cpp b/src/transactions/ExtendFootprintTTLOpFrame.cpp index a4b00112be..9df14d4273 100644 --- a/src/transactions/ExtendFootprintTTLOpFrame.cpp +++ b/src/transactions/ExtendFootprintTTLOpFrame.cpp @@ -62,8 +62,7 @@ ExtendFootprintTTLOpFrame::doApply( auto const& resources = mParentTx.sorobanResources(); auto const& footprint = resources.footprint; - auto const& sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); + auto const& sorobanConfig = app.getSorobanNetworkConfigForApply(); rust::Vec rustEntryRentChanges; rustEntryRentChanges.reserve(footprint.readOnly.size()); diff --git a/src/transactions/InvokeHostFunctionOpFrame.cpp b/src/transactions/InvokeHostFunctionOpFrame.cpp index 30cadc324c..ae76c16264 100644 --- a/src/transactions/InvokeHostFunctionOpFrame.cpp +++ b/src/transactions/InvokeHostFunctionOpFrame.cpp @@ -332,8 +332,7 @@ InvokeHostFunctionOpFrame::doApply( Config const& appConfig = app.getConfig(); HostFunctionMetrics metrics(app.getSorobanMetrics()); auto timeScope = metrics.getExecTimer(); - auto const& sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); + auto const& sorobanConfig = app.getSorobanNetworkConfigForApply(); // Get the entries for the footprint rust::Vec ledgerEntryCxxBufs; diff --git a/src/transactions/OperationFrame.cpp b/src/transactions/OperationFrame.cpp index ff14610986..5108b15106 100644 --- a/src/transactions/OperationFrame.cpp +++ b/src/transactions/OperationFrame.cpp @@ -144,8 +144,11 @@ OperationFrame::apply(AppConnector& app, SignatureChecker& signatureChecker, CLOG_TRACE(Tx, "{}", xdrToCerealString(mOperation, "Operation")); LedgerSnapshot ltxState(ltx); - bool applyRes = - checkValid(app, signatureChecker, ltxState, true, res, sorobanData); + std::optional cfg = + isSoroban() ? std::make_optional(app.getSorobanNetworkConfigForApply()) + : std::nullopt; + bool applyRes = checkValid(app, signatureChecker, cfg, ltxState, true, res, + sorobanData); if (applyRes) { applyRes = doApply(app, ltx, sorobanBasePrngSeed, res, sorobanData); @@ -219,6 +222,7 @@ OperationFrame::getSourceID() const bool OperationFrame::checkValid(AppConnector& app, SignatureChecker& signatureChecker, + std::optional const& cfg, LedgerSnapshot const& ls, bool forApply, OperationResult& res, std::shared_ptr sorobanData) const @@ -226,8 +230,8 @@ OperationFrame::checkValid(AppConnector& app, ZoneScoped; bool validationResult = false; auto validate = [this, &res, forApply, &signatureChecker, &app, - &sorobanData, - &validationResult](LedgerSnapshot const& ls) { + &sorobanData, &validationResult, + &cfg](LedgerSnapshot const& ls) { if (!isOpSupported(ls.getLedgerHeader().current())) { res.code(opNOT_SUPPORTED); @@ -262,12 +266,9 @@ OperationFrame::checkValid(AppConnector& app, isSoroban()) { releaseAssertOrThrow(sorobanData); - auto const& sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); - - validationResult = - doCheckValidForSoroban(sorobanConfig, app.getConfig(), - ledgerVersion, res, *sorobanData); + releaseAssertOrThrow(cfg); + validationResult = doCheckValidForSoroban( + cfg.value(), app.getConfig(), ledgerVersion, res, *sorobanData); } else { diff --git a/src/transactions/OperationFrame.h b/src/transactions/OperationFrame.h index 5cc8aa6641..c260d8f11f 100644 --- a/src/transactions/OperationFrame.h +++ b/src/transactions/OperationFrame.h @@ -74,6 +74,7 @@ class OperationFrame AccountID getSourceID() const; bool checkValid(AppConnector& app, SignatureChecker& signatureChecker, + std::optional const& cfg, LedgerSnapshot const& ls, bool forApply, OperationResult& res, std::shared_ptr sorobanData) const; diff --git a/src/transactions/RestoreFootprintOpFrame.cpp b/src/transactions/RestoreFootprintOpFrame.cpp index 354b894ae5..dc849b1c6f 100644 --- a/src/transactions/RestoreFootprintOpFrame.cpp +++ b/src/transactions/RestoreFootprintOpFrame.cpp @@ -63,8 +63,7 @@ RestoreFootprintOpFrame::doApply( auto const& resources = mParentTx.sorobanResources(); auto const& footprint = resources.footprint; auto ledgerSeq = ltx.loadHeader().current().ledgerSeq; - auto const& sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); + auto const& sorobanConfig = app.getSorobanNetworkConfigForApply(); auto const& appConfig = app.getConfig(); auto const& archivalSettings = sorobanConfig.stateArchivalSettings(); diff --git a/src/transactions/TransactionFrame.cpp b/src/transactions/TransactionFrame.cpp index 223817b92c..4d9b2d6ae1 100644 --- a/src/transactions/TransactionFrame.cpp +++ b/src/transactions/TransactionFrame.cpp @@ -1465,8 +1465,8 @@ TransactionFrame::checkValidWithOptionallyChargedFee( auto const& op = mOperations[i]; auto& opResult = txResult->getOpResultAt(i); - if (!op->checkValid(app, signatureChecker, ls, false, opResult, - txResult->getSorobanData())) + if (!op->checkValid(app, signatureChecker, sorobanConfig, ls, false, + opResult, txResult->getSorobanData())) { // it's OK to just fast fail here and not try to call // checkValid on all operations as the resulting object @@ -1719,8 +1719,7 @@ TransactionFrame::applyOperations(SignatureChecker& signatureChecker, // If transaction fails, we don't charge for any // refundable resources. auto preApplyFee = computePreApplySorobanResourceFee( - ledgerVersion, - app.getLedgerManager().getSorobanNetworkConfigForApply(), + ledgerVersion, app.getSorobanNetworkConfigForApply(), app.getConfig()); txResult.getSorobanData()->setSorobanFeeRefund( @@ -1838,8 +1837,7 @@ TransactionFrame::apply(AppConnector& app, AbstractLedgerTxn& ltx, SOROBAN_PROTOCOL_VERSION) && isSoroban()) { - sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); + sorobanConfig = app.getSorobanNetworkConfigForApply(); sorobanResourceFee = computePreApplySorobanResourceFee( ledgerVersion, *sorobanConfig, app.getConfig()); diff --git a/src/transactions/TransactionSQL.cpp b/src/transactions/TransactionSQL.cpp index c35a725f36..7d68016328 100644 --- a/src/transactions/TransactionSQL.cpp +++ b/src/transactions/TransactionSQL.cpp @@ -348,20 +348,23 @@ void dropSupportTransactionFeeHistory(Database& db) { ZoneScoped; - db.getSession() << "DROP TABLE IF EXISTS txfeehistory"; + releaseAssert(threadIsMain()); + db.getRawSession() << "DROP TABLE IF EXISTS txfeehistory"; } void dropSupportTxSetHistory(Database& db) { ZoneScoped; - db.getSession() << "DROP TABLE IF EXISTS txsethistory"; + releaseAssert(threadIsMain()); + db.getRawSession() << "DROP TABLE IF EXISTS txsethistory"; } void dropSupportTxHistory(Database& db) { ZoneScoped; - db.getSession() << "DROP TABLE IF EXISTS txhistory"; + releaseAssert(threadIsMain()); + db.getRawSession() << "DROP TABLE IF EXISTS txhistory"; } } diff --git a/src/transactions/test/SorobanTxTestUtils.cpp b/src/transactions/test/SorobanTxTestUtils.cpp index f16a7b9b4a..f583a09b09 100644 --- a/src/transactions/test/SorobanTxTestUtils.cpp +++ b/src/transactions/test/SorobanTxTestUtils.cpp @@ -1055,7 +1055,7 @@ SorobanTest::getDummyAccount() SorobanNetworkConfig const& SorobanTest::getNetworkCfg() { - return getApp().getLedgerManager().getSorobanNetworkConfigReadOnly(); + return getApp().getLedgerManager().getMutableSorobanNetworkConfig(); } uint32_t