From a885d8844098295dc82087285e2cdea917cfcf2f Mon Sep 17 00:00:00 2001 From: Cmdv Date: Tue, 3 Dec 2024 22:10:01 +0000 Subject: [PATCH] 1906 - Add cache for address --- cardano-db-sync/src/Cardano/DbSync/Api.hs | 3 +- cardano-db-sync/src/Cardano/DbSync/Cache.hs | 66 ++++++++ .../src/Cardano/DbSync/Cache/Types.hs | 145 +++++++++--------- .../src/Cardano/DbSync/Era/Byron/Genesis.hs | 20 +-- .../src/Cardano/DbSync/Era/Byron/Insert.hs | 17 +- .../src/Cardano/DbSync/Era/Shelley/Genesis.hs | 17 +- .../Cardano/DbSync/Era/Universal/Insert/Tx.hs | 21 +-- 7 files changed, 163 insertions(+), 126 deletions(-) diff --git a/cardano-db-sync/src/Cardano/DbSync/Api.hs b/cardano-db-sync/src/Cardano/DbSync/Api.hs index 02f0b9745..81f259ab5 100644 --- a/cardano-db-sync/src/Cardano/DbSync/Api.hs +++ b/cardano-db-sync/src/Cardano/DbSync/Api.hs @@ -353,7 +353,8 @@ mkSyncEnv trce backend connectionString syncOptions protoInfo nw nwMagic systemS then newEmptyCache CacheCapacity - { cacheCapacityStake = 100000 + { cacheCapacityAddress = 100000 + , cacheCapacityStake = 100000 , cacheCapacityDatum = 250000 , cacheCapacityMultiAsset = 250000 , cacheCapacityTx = 100000 diff --git a/cardano-db-sync/src/Cardano/DbSync/Cache.hs b/cardano-db-sync/src/Cardano/DbSync/Cache.hs index ae0bcc04d..36c8315fd 100644 --- a/cardano-db-sync/src/Cardano/DbSync/Cache.hs +++ b/cardano-db-sync/src/Cardano/DbSync/Cache.hs @@ -18,6 +18,7 @@ module Cardano.DbSync.Cache ( queryPrevBlockWithCache, queryOrInsertStakeAddress, queryOrInsertRewardAccount, + insertAddressUsingCache, insertStakeAddress, queryStakeAddrWithCache, queryTxIdWithCache, @@ -31,6 +32,7 @@ module Cardano.DbSync.Cache ( import Cardano.BM.Trace import qualified Cardano.Db as DB +import qualified Cardano.Db.Schema.Variant.TxOut as V import Cardano.DbSync.Cache.Epoch (rollbackMapEpochInCache) import qualified Cardano.DbSync.Cache.FIFO as FIFO import qualified Cardano.DbSync.Cache.LRU as LRU @@ -253,6 +255,61 @@ queryPoolKeyWithCache cache cacheUA hsh = Map.insert hsh phId pure $ Right phId +insertAddressUsingCache :: + (MonadBaseControl IO m, MonadIO m) => + CacheStatus -> + CacheAction -> + ByteString -> + V.Address -> + ReaderT SqlBackend m V.AddressId +insertAddressUsingCache cache cacheUA addrRaw vAdrs = do + case cache of + NoCache -> do + -- Directly query the database for the address ID when no caching is active. + mAddrId <- DB.queryAddressId addrRaw + processResult mAddrId + ActiveCache ci -> do + -- Use active cache to attempt fetching the address ID from the cache. + adrs <- liftIO $ readTVarIO (cAddress ci) + case LRU.lookup addrRaw adrs of + Just (addrId, adrs') -> do + -- If found in cache, record a cache hit and update the cache state. + liftIO $ hitAddress (cStats ci) + liftIO $ atomically $ writeTVar (cAddress ci) adrs' + pure addrId + Nothing -> do + -- If not found in cache, log a miss, and query the database. + liftIO $ missAddress (cStats ci) + mAddrId <- DB.queryAddressId addrRaw + processWithCache mAddrId ci + where + processResult mAddrId = + case mAddrId of + -- If address ID isn't found in the database, insert it. + Nothing -> DB.insertAddress vAdrs + -- Return the found address ID. + Just addrId -> pure addrId + + processWithCache mAddrId ci = + case mAddrId of + -- If address ID isn't found, insert and possibly cache it. + Nothing -> do + addrId <- DB.insertAddress vAdrs + cacheIfNeeded addrId ci + pure addrId + -- If found, optionally cache it. + Just addrId -> do + cacheIfNeeded addrId ci + pure addrId + + cacheIfNeeded addrId ci = + -- Cache the address ID if the caching action specifies it should be cached. + when (shouldCache cacheUA) $ + liftIO $ + atomically $ + modifyTVar (cAddress ci) $ + LRU.insert addrRaw addrId + insertPoolKeyWithCache :: (MonadBaseControl IO m, MonadIO m) => CacheStatus -> @@ -535,6 +592,15 @@ missMAssets :: StrictTVar IO CacheStatistics -> IO () missMAssets ref = atomically $ modifyTVar ref (\cs -> cs {multiAssetsQueries = 1 + multiAssetsQueries cs}) +-- Address +hitAddress :: StrictTVar IO CacheStatistics -> IO () +hitAddress ref = + atomically $ modifyTVar ref (\cs -> cs {addressHits = 1 + addressHits cs, addressQueries = 1 + addressQueries cs}) + +missAddress :: StrictTVar IO CacheStatistics -> IO () +missAddress ref = + atomically $ modifyTVar ref (\cs -> cs {addressQueries = 1 + addressQueries cs}) + -- Blocks hitPBlock :: StrictTVar IO CacheStatistics -> IO () hitPBlock ref = diff --git a/cardano-db-sync/src/Cardano/DbSync/Cache/Types.hs b/cardano-db-sync/src/Cardano/DbSync/Cache/Types.hs index 187898a6b..9c060f907 100644 --- a/cardano-db-sync/src/Cardano/DbSync/Cache/Types.hs +++ b/cardano-db-sync/src/Cardano/DbSync/Cache/Types.hs @@ -31,6 +31,7 @@ module Cardano.DbSync.Cache.Types ( ) where import qualified Cardano.Db as DB +import qualified Cardano.Db.Schema.Variant.TxOut as V import Cardano.DbSync.Cache.FIFO (FIFOCache) import qualified Cardano.DbSync.Cache.FIFO as FIFO import Cardano.DbSync.Cache.LRU (LRUCache) @@ -82,6 +83,7 @@ data CacheInternal = CacheInternal , cPrevBlock :: !(StrictTVar IO (Maybe (DB.BlockId, ByteString))) , cStats :: !(StrictTVar IO CacheStatistics) , cEpoch :: !(StrictTVar IO CacheEpoch) + , cAddress :: !(StrictTVar IO (LRUCache ByteString V.AddressId)) , cTxIds :: !(StrictTVar IO (FIFOCache (Ledger.TxId StandardCrypto) DB.TxId)) } @@ -96,13 +98,16 @@ data CacheStatistics = CacheStatistics , multiAssetsQueries :: !Word64 , prevBlockHits :: !Word64 , prevBlockQueries :: !Word64 + , addressHits :: !Word64 + , addressQueries :: !Word64 , txIdsHits :: !Word64 , txIdsQueries :: !Word64 } -- CacheCapacity is used to define capacities for different types of cache entries. data CacheCapacity = CacheCapacity - { cacheCapacityStake :: !Word64 + { cacheCapacityAddress :: !Word64 + , cacheCapacityStake :: !Word64 , cacheCapacityDatum :: !Word64 , cacheCapacityMultiAsset :: !Word64 , cacheCapacityTx :: !Word64 @@ -128,7 +133,7 @@ data CacheEpoch = CacheEpoch deriving (Show) textShowStats :: CacheStatus -> IO Text -textShowStats NoCache = pure "NoCache" +textShowStats NoCache = pure "No Caches" textShowStats (ActiveCache ic) = do isCacheOptimised <- readTVarIO $ cIsCacheOptimised ic stats <- readTVarIO $ cStats ic @@ -137,77 +142,77 @@ textShowStats (ActiveCache ic) = do datums <- readTVarIO (cDatum ic) mAssets <- readTVarIO (cMultiAssets ic) txIds <- readTVarIO (cTxIds ic) + address <- readTVarIO (cAddress ic) pure $ mconcat [ "\nCache Statistics:" , "\n Caches Optimised: " <> textShow isCacheOptimised - , "\n Stake Addresses: " - , "cache sizes: " - , textShow (Map.size $ scStableCache stakeHashRaws) - , " and " - , textShow (LRU.getSize $ scLruCache stakeHashRaws) - , if credsQueries stats == 0 - then "" - else ", hit rate: " <> textShow (100 * credsHits stats `div` credsQueries stats) <> "%" - , ", hits: " - , textShow (credsHits stats) - , ", misses: " - , textShow (credsQueries stats - credsHits stats) - , "\n Pools: " - , "cache size: " - , textShow (Map.size pools) - , if poolsQueries stats == 0 - then "" - else ", hit rate: " <> textShow (100 * poolsHits stats `div` poolsQueries stats) <> "%" - , ", hits: " - , textShow (poolsHits stats) - , ", misses: " - , textShow (poolsQueries stats - poolsHits stats) - , "\n Datums: " - , "cache capacity: " - , textShow (LRU.getCapacity datums) - , ", cache size: " - , textShow (LRU.getSize datums) - , if datumQueries stats == 0 - then "" - else ", hit rate: " <> textShow (100 * datumHits stats `div` datumQueries stats) <> "%" - , ", hits: " - , textShow (datumHits stats) - , ", misses: " - , textShow (datumQueries stats - datumHits stats) - , "\n Multi Assets: " - , "cache capacity: " - , textShow (LRU.getCapacity mAssets) - , ", cache size: " - , textShow (LRU.getSize mAssets) - , if multiAssetsQueries stats == 0 - then "" - else ", hit rate: " <> textShow (100 * multiAssetsHits stats `div` multiAssetsQueries stats) <> "%" - , ", hits: " - , textShow (multiAssetsHits stats) - , ", misses: " - , textShow (multiAssetsQueries stats - multiAssetsHits stats) - , "\n Previous Block: " - , if prevBlockQueries stats == 0 - then "" - else "hit rate: " <> textShow (100 * prevBlockHits stats `div` prevBlockQueries stats) <> "%" - , ", hits: " - , textShow (prevBlockHits stats) - , ", misses: " - , textShow (prevBlockQueries stats - prevBlockHits stats) - , "\n TxId: " - , "cache size: " - , textShow (FIFO.getSize txIds) - , ", cache capacity: " - , textShow (FIFO.getCapacity txIds) - , if txIdsQueries stats == 0 - then "" - else ", hit rate: " <> textShow (100 * txIdsHits stats `div` txIdsQueries stats) <> "%" - , ", hits: " - , textShow (txIdsHits stats) - , ", misses: " - , textShow (txIdsQueries stats - txIdsHits stats) + , textCacheSection "Stake Addresses" (scLruCache stakeHashRaws) (scStableCache stakeHashRaws) (credsHits stats) (credsQueries stats) + , textMapSection "Pools" pools (poolsHits stats) (poolsQueries stats) + , textLruSection "Datums" datums (datumHits stats) (datumQueries stats) + , textLruSection "Addresses" address (addressHits stats) (addressQueries stats) + , textLruSection "Multi Assets" mAssets (multiAssetsHits stats) (multiAssetsQueries stats) + , textPrevBlockSection stats + , textFifoSection "TxId" txIds (txIdsHits stats) (txIdsQueries stats) ] + where + textCacheSection title cacheLru cacheStable hits queries = + mconcat + [ "\n " <> title <> ": " + , "cache sizes: " + , textShow (Map.size cacheStable) + , " and " + , textShow (LRU.getSize cacheLru) + , hitMissStats hits queries + ] + + textMapSection title cache hits queries = + mconcat + [ "\n " <> title <> ": " + , "cache size: " + , textShow (Map.size cache) + , hitMissStats hits queries + ] + + textLruSection title cache hits queries = + mconcat + [ "\n " <> title <> ": " + , "cache capacity: " + , textShow (LRU.getCapacity cache) + , ", cache size: " + , textShow (LRU.getSize cache) + , hitMissStats hits queries + ] + + textFifoSection title cache hits queries = + mconcat + [ "\n " <> title <> ": " + , "cache size: " + , textShow (FIFO.getSize cache) + , ", cache capacity: " + , textShow (FIFO.getCapacity cache) + , hitMissStats hits queries + ] + + textPrevBlockSection stats = + mconcat + [ "\n Previous Block: " + , hitMissStats (prevBlockHits stats) (prevBlockQueries stats) + ] + + hitMissStats hits queries = + mconcat + [ hitRate hits queries + , ", hits: " + , textShow hits + , ", misses: " + , textShow (queries - hits) + ] + + hitRate hits queries = + if queries == 0 + then "" + else ", hit rate: " <> textShow (100 * hits `div` queries) <> "%" useNoCache :: CacheStatus useNoCache = NoCache @@ -218,6 +223,7 @@ newEmptyCache CacheCapacity {..} = liftIO $ do cStake <- newTVarIO (StakeCache Map.empty (LRU.empty cacheCapacityStake)) cPools <- newTVarIO Map.empty cDatum <- newTVarIO (LRU.empty cacheCapacityDatum) + cAddress <- newTVarIO (LRU.empty cacheCapacityAddress) cMultiAssets <- newTVarIO (LRU.empty cacheCapacityMultiAsset) cPrevBlock <- newTVarIO Nothing cStats <- newTVarIO initCacheStatistics @@ -234,11 +240,12 @@ newEmptyCache CacheCapacity {..} = liftIO $ do , cPrevBlock = cPrevBlock , cStats = cStats , cEpoch = cEpoch + , cAddress = cAddress , cTxIds = cTxIds } initCacheStatistics :: CacheStatistics -initCacheStatistics = CacheStatistics 0 0 0 0 0 0 0 0 0 0 0 0 +initCacheStatistics = CacheStatistics 0 0 0 0 0 0 0 0 0 0 0 0 0 0 initCacheEpoch :: CacheEpoch initCacheEpoch = CacheEpoch mempty Nothing diff --git a/cardano-db-sync/src/Cardano/DbSync/Era/Byron/Genesis.hs b/cardano-db-sync/src/Cardano/DbSync/Era/Byron/Genesis.hs index 8fcf8993c..493f5f4e5 100644 --- a/cardano-db-sync/src/Cardano/DbSync/Era/Byron/Genesis.hs +++ b/cardano-db-sync/src/Cardano/DbSync/Era/Byron/Genesis.hs @@ -21,6 +21,8 @@ import qualified Cardano.Db.Schema.Core.TxOut as C import qualified Cardano.Db.Schema.Variant.TxOut as V import Cardano.DbSync.Api import Cardano.DbSync.Api.Types (SyncEnv (..)) +import Cardano.DbSync.Cache (insertAddressUsingCache) +import Cardano.DbSync.Cache.Types (CacheAction (..)) import Cardano.DbSync.Config.Types import qualified Cardano.DbSync.Era.Byron.Util as Byron import Cardano.DbSync.Era.Util (liftLookupFail) @@ -228,10 +230,12 @@ insertTxOutsByron syncEnv disInOut blkId (address, value) = do DB.TxOutVariantAddress -> do let addrRaw = serialize' address vAddress = mkVAddress addrRaw - addrDetailId <- insertAddress addrRaw vAddress + addrDetailId <- insertAddressUsingCache cache UpdateCache addrRaw vAddress void . DB.insertTxOut $ DB.VTxOutW (mkVTxOut txId addrDetailId) Nothing where + cache = envCache syncEnv + mkVTxOut :: DB.TxId -> V.AddressId -> V.TxOut mkVTxOut txId addrDetailId = V.TxOut @@ -256,19 +260,7 @@ insertTxOutsByron syncEnv disInOut blkId (address, value) = do , V.addressStakeAddressId = Nothing -- Byron does not have a stake address. } - insertAddress :: - (MonadBaseControl IO m, MonadIO m) => - ByteString -> - V.Address -> - ReaderT SqlBackend m V.AddressId - insertAddress addrRaw vAdrs = do - mAddrId <- DB.queryAddressId addrRaw - case mAddrId of - Nothing -> DB.insertAddress vAdrs - -- this address is already in the database, so we can just return the id to be linked to the txOut. - Just addrId -> pure addrId - --- ----------------------------------------------------------------------------- +--------------------------------------------------------------------------------- configGenesisHash :: Byron.Config -> ByteString configGenesisHash = diff --git a/cardano-db-sync/src/Cardano/DbSync/Era/Byron/Insert.hs b/cardano-db-sync/src/Cardano/DbSync/Era/Byron/Insert.hs index 90e03c85f..e9934f6da 100644 --- a/cardano-db-sync/src/Cardano/DbSync/Era/Byron/Insert.hs +++ b/cardano-db-sync/src/Cardano/DbSync/Era/Byron/Insert.hs @@ -25,11 +25,12 @@ import qualified Cardano.Db.Schema.Variant.TxOut as V import Cardano.DbSync.Api import Cardano.DbSync.Api.Types (InsertOptions (..), SyncEnv (..), SyncOptions (..)) import Cardano.DbSync.Cache ( + insertAddressUsingCache, insertBlockAndCache, queryPrevBlockWithCache, ) import Cardano.DbSync.Cache.Epoch (writeEpochBlockDiffToCache) -import Cardano.DbSync.Cache.Types (CacheStatus (..), EpochBlockDiff (..)) +import Cardano.DbSync.Cache.Types (CacheAction (..), CacheStatus (..), EpochBlockDiff (..)) import qualified Cardano.DbSync.Era.Byron.Util as Byron import Cardano.DbSync.Era.Util (liftLookupFail) import Cardano.DbSync.Error @@ -366,12 +367,14 @@ insertTxOutByron syncEnv _hasConsumed bootStrap txId index txout = , C.txOutValue = DbLovelace (Byron.unsafeGetLovelace $ Byron.txOutValue txout) } DB.TxOutVariantAddress -> do - addrDetailId <- insertAddress + addrDetailId <- insertAddressUsingCache cache UpdateCache addrRaw vAddress void . DB.insertTxOut $ DB.VTxOutW (vTxOut addrDetailId) Nothing where addrRaw :: ByteString addrRaw = serialize' (Byron.txOutAddress txout) + cache = envCache syncEnv + vTxOut :: V.AddressId -> V.TxOut vTxOut addrDetailId = V.TxOut @@ -396,16 +399,6 @@ insertTxOutByron syncEnv _hasConsumed bootStrap txId index txout = , V.addressStakeAddressId = Nothing -- Byron does not have a stake address. } - insertAddress :: - (MonadBaseControl IO m, MonadIO m) => - ReaderT SqlBackend m V.AddressId - insertAddress = do - mAddrId <- DB.queryAddressId addrRaw - case mAddrId of - Nothing -> DB.insertAddress vAddress - -- this address is already in the database, so we can just return the id to be linked to the txOut. - Just addrId -> pure addrId - insertTxIn :: (MonadBaseControl IO m, MonadIO m) => Trace IO Text -> diff --git a/cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Genesis.hs b/cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Genesis.hs index 0dcde23af..0cc49a38e 100644 --- a/cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Genesis.hs +++ b/cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Genesis.hs @@ -17,8 +17,8 @@ import qualified Cardano.Db.Schema.Core.TxOut as C import qualified Cardano.Db.Schema.Variant.TxOut as V import Cardano.DbSync.Api import Cardano.DbSync.Api.Types (InsertOptions (..), SyncEnv (..), SyncOptions (..)) -import Cardano.DbSync.Cache (tryUpdateCacheTx) -import Cardano.DbSync.Cache.Types (CacheStatus (..), useNoCache) +import Cardano.DbSync.Cache (insertAddressUsingCache, tryUpdateCacheTx) +import Cardano.DbSync.Cache.Types (CacheAction (..), CacheStatus (..), useNoCache) import qualified Cardano.DbSync.Era.Shelley.Generic.Util as Generic import Cardano.DbSync.Era.Universal.Insert.Certificate (insertDelegation, insertStakeRegistration) import Cardano.DbSync.Era.Universal.Insert.Other (insertStakeAddressRefIfMissing) @@ -267,10 +267,11 @@ insertTxOuts syncEnv trce blkId (TxIn txInId _, txOut) = do , C.txOutConsumedByTxId = Nothing } DB.TxOutVariantAddress -> do - addrDetailId <- insertAddress + addrDetailId <- insertAddressUsingCache cache UpdateCache addrRaw vAddress void . DB.insertTxOut $ DB.VTxOutW (makeVTxOut addrDetailId txId) Nothing where addr = txOut ^. Core.addrTxOutL + cache = envCache syncEnv hasScript = maybe False Generic.hasCredScript (Generic.getPaymentCred addr) addrRaw = serialiseAddr addr @@ -298,16 +299,6 @@ insertTxOuts syncEnv trce blkId (TxIn txInId _, txOut) = do , V.addressStakeAddressId = Nothing -- No stake addresses in Shelley Genesis } - insertAddress :: - (MonadBaseControl IO m, MonadIO m) => - ReaderT SqlBackend m V.AddressId - insertAddress = do - mAddrId <- DB.queryAddressId addrRaw - case mAddrId of - Nothing -> DB.insertAddress vAddress - -- this address is already in the database, so we can just return the id to be linked to the txOut. - Just addrId -> pure addrId - -- Insert pools and delegations coming from Genesis. insertStaking :: (MonadBaseControl IO m, MonadIO m) => diff --git a/cardano-db-sync/src/Cardano/DbSync/Era/Universal/Insert/Tx.hs b/cardano-db-sync/src/Cardano/DbSync/Era/Universal/Insert/Tx.hs index 8674e1f02..4f12d03a9 100644 --- a/cardano-db-sync/src/Cardano/DbSync/Era/Universal/Insert/Tx.hs +++ b/cardano-db-sync/src/Cardano/DbSync/Era/Universal/Insert/Tx.hs @@ -20,8 +20,8 @@ import qualified Cardano.Db.Schema.Core.TxOut as C import qualified Cardano.Db.Schema.Variant.TxOut as V import Cardano.DbSync.Api import Cardano.DbSync.Api.Types (InsertOptions (..), SyncEnv (..)) -import Cardano.DbSync.Cache (queryTxIdWithCache, tryUpdateCacheTx) -import Cardano.DbSync.Cache.Types (CacheStatus (..)) +import Cardano.DbSync.Cache (insertAddressUsingCache, queryTxIdWithCache, tryUpdateCacheTx) +import Cardano.DbSync.Cache.Types (CacheAction (..), CacheStatus (..)) import qualified Cardano.DbSync.Era.Shelley.Generic as Generic import Cardano.DbSync.Era.Shelley.Generic.Metadata (TxMetadataValue (..), metadataValueToJsonNoSchema) import Cardano.DbSync.Era.Shelley.Generic.Tx.Types (TxIn (..)) @@ -251,7 +251,7 @@ insertTxOut tracer cache iopts (txId, txHash) (Generic.TxOut index addr value ma , V.addressPaymentCred = Generic.maybePaymentCred addr , V.addressStakeAddressId = mSaId } - addrId <- lift $ insertAddress addr vAddress + addrId <- lift $ insertAddressUsingCache cache UpdateCache (Ledger.serialiseAddr addr) vAddress pure $ DB.VTxOutW (mkTxOutVariant mSaId addrId mDatumId mScriptId) @@ -284,19 +284,6 @@ insertTxOut tracer cache iopts (txId, txHash) (Generic.TxOut index addr value ma , V.txOutStakeAddressId = mSaId } -insertAddress :: - (MonadBaseControl IO m, MonadIO m) => - Ledger.Addr StandardCrypto -> - V.Address -> - ReaderT SqlBackend m V.AddressId -insertAddress address vAddress = do - mAddrId <- DB.queryAddressId addrRaw - case mAddrId of - Nothing -> DB.insertAddress vAddress - Just addrId -> pure addrId - where - addrRaw = Ledger.serialiseAddr address - insertTxMetadata :: (MonadBaseControl IO m, MonadIO m) => Trace IO Text -> @@ -452,7 +439,7 @@ insertCollateralTxOut tracer cache iopts (txId, _txHash) (Generic.TxOut index ad , V.addressPaymentCred = Generic.maybePaymentCred addr , V.addressStakeAddressId = mSaId } - addrId <- lift $ insertAddress addr vAddress + addrId <- lift $ insertAddressUsingCache cache UpdateCache (Ledger.serialiseAddr addr) vAddress lift . DB.insertCollateralTxOut $ DB.VCollateralTxOutW