From dbb466baa9cc896c831e8c5eb8cb3a4a26177fd4 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Thu, 8 Feb 2024 18:07:32 -0500
Subject: [PATCH 01/15] Initia cut of a class-based peer-selector.
---
catchup/classBasedPeerSelector.go | 107 ++++++++++++++++++++++++++++++
catchup/peerSelector.go | 6 ++
2 files changed, 113 insertions(+)
create mode 100644 catchup/classBasedPeerSelector.go
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
new file mode 100644
index 0000000000..13644e2a9b
--- /dev/null
+++ b/catchup/classBasedPeerSelector.go
@@ -0,0 +1,107 @@
+// Copyright (C) 2019-2024 Algorand, Inc.
+// This file is part of go-algorand
+//
+// go-algorand is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// go-algorand is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with go-algorand. If not, see .
+
+package catchup
+
+import (
+ "github.com/algorand/go-algorand/network"
+ "github.com/algorand/go-deadlock"
+ "time"
+)
+
+// classBasedPeerSelector is a peerSelector that tracks and ranks classes of peers based on their response behavior.
+// It is used to select the most appropriate peers to download blocks from - this is most useful when catching up
+// and needing to figure out whether the blocks can be retrieved from relay nodes or require archive nodes.
+type classBasedPeerSelector struct {
+ mu deadlock.Mutex
+ peerSelectors []*wrappedPeerSelector
+}
+
+func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int, int) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+
+ peerSelectorSortNeeded := false
+ poolIdx, peerIdx := -1, -1
+ for _, wp := range c.peerSelectors {
+ // See if the peer is in the class, ranking it appropriately if so
+ poolIdx, peerIdx = wp.peerSelector.rankPeer(psp, rank)
+ if poolIdx < 0 || peerIdx < 0 {
+ // Peer not found in this class
+ continue
+ }
+ // Peer was in this class, if there was any kind of download issue, we increment the failure count
+ if rank >= peerRankNoBlockForRound {
+ wp.downloadFailures++
+ }
+
+ // If we have failed more than the tolerance factor, we re-sort the slice of peerSelectors
+ if wp.downloadFailures > wp.toleranceFactor {
+ peerSelectorSortNeeded = true
+ }
+ break
+ }
+
+ if peerSelectorSortNeeded {
+ // TODO: Implement sorting of peerSelectors
+ }
+
+ return poolIdx, peerIdx
+}
+
+func (c *classBasedPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+
+ for _, wp := range c.peerSelectors {
+ rank = wp.peerSelector.peerDownloadDurationToRank(psp, blockDownloadDuration)
+ // If rank is peerRankInvalidDownload, we check the next class's peerSelector
+ if rank >= peerRankInvalidDownload {
+ continue
+ }
+ // Should be a legit ranking, we return it
+ return
+ }
+ // If we reached here, we have exhausted all classes without finding the peer
+ return peerRankInvalidDownload
+}
+
+func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ for _, wp := range c.peerSelectors {
+ psp, err = wp.peerSelector.getNextPeer()
+ if err != nil {
+ // TODO: No peers available in this class, move to the end of our list???
+ continue
+ }
+ return
+ }
+ // If we reached here, we have exhausted all classes and still have no peers
+ return
+}
+
+type wrappedPeerSelector struct {
+ peerSelector peerSelector
+ peerClass network.PeerOption
+ toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's peerSelector
+ downloadFailures int
+ // TODO: Add a lastUsed time.Duration to this struct
+}
+
+// Logic: We try a class's peerSelector Y times, and if it fails, we move to the next class.
+// If we get to the last peerSelector and are still not getting blocks, we return an error.
+// NOTE: if a peerselector has no pools, we do not use it??
diff --git a/catchup/peerSelector.go b/catchup/peerSelector.go
index 4ceda8d42d..a705c3c5db 100644
--- a/catchup/peerSelector.go
+++ b/catchup/peerSelector.go
@@ -109,6 +109,12 @@ type peerPool struct {
peers []peerPoolEntry
}
+type peerSelectorI interface {
+ rankPeer(psp *peerSelectorPeer, rank int) (int, int)
+ peerDownloadDurationToRank(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int)
+ getNextPeer() (psp *peerSelectorPeer, err error)
+}
+
// peerSelector is a helper struct used to select the next peer to try and connect to
// for various catchup purposes. Unlike the underlying network GetPeers(), it allows the
// client to provide feedback regarding the peer's performance, and to have the subsequent
From 81159828997b840a95f32a4cc834bb16a2b0f1f1 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Mon, 12 Feb 2024 16:13:28 -0500
Subject: [PATCH 02/15] Implement sorting for class-based peer selector based
on combo of tolerance factor violations, priority and lastCheckedTime.
---
catchup/classBasedPeerSelector.go | 69 ++++++++++++++++++++++++-------
1 file changed, 55 insertions(+), 14 deletions(-)
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index 13644e2a9b..8d4762eb32 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -17,11 +17,16 @@
package catchup
import (
+ "errors"
"github.com/algorand/go-algorand/network"
"github.com/algorand/go-deadlock"
+ "sort"
"time"
)
+// The duration after which we reset the downloadFailures for a peerSelector
+const lastCheckedDuration = 10 * time.Minute
+
// classBasedPeerSelector is a peerSelector that tracks and ranks classes of peers based on their response behavior.
// It is used to select the most appropriate peers to download blocks from - this is most useful when catching up
// and needing to figure out whether the blocks can be retrieved from relay nodes or require archive nodes.
@@ -56,12 +61,47 @@ func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int,
}
if peerSelectorSortNeeded {
- // TODO: Implement sorting of peerSelectors
+ c.sortPeerSelectors()
}
return poolIdx, peerIdx
}
+// sortPeerSelectors sorts the peerSelectors by tolerance factor violation, and then by priority
+// It should only be called within a locked context
+func (c *classBasedPeerSelector) sortPeerSelectors() {
+ psUnderTolerance := make([]*wrappedPeerSelector, 0, len(c.peerSelectors))
+ psOverTolerance := make([]*wrappedPeerSelector, 0, len(c.peerSelectors))
+ for _, wp := range c.peerSelectors {
+ // If the peerSelector's download failures have not been reset in a while, we reset them
+ if time.Since(wp.lastCheckedTime) > lastCheckedDuration {
+ wp.downloadFailures = 0
+ // Reset again here, so we don't keep resetting the same peerSelector
+ wp.lastCheckedTime = time.Now()
+ }
+
+ if wp.downloadFailures <= wp.toleranceFactor {
+ psUnderTolerance = append(psUnderTolerance, wp)
+ } else {
+ psOverTolerance = append(psOverTolerance, wp)
+ }
+
+ }
+
+ // Sort the two groups by priority
+ sortByPriority := func(ps []*wrappedPeerSelector) {
+ sort.SliceStable(ps, func(i, j int) bool {
+ return ps[i].priority < ps[j].priority
+ })
+ }
+
+ sortByPriority(psUnderTolerance)
+ sortByPriority(psOverTolerance)
+
+ //Append the two groups back together
+ c.peerSelectors = append(psUnderTolerance, psOverTolerance...)
+}
+
func (c *classBasedPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
c.mu.Lock()
defer c.mu.Unlock()
@@ -73,7 +113,7 @@ func (c *classBasedPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPee
continue
}
// Should be a legit ranking, we return it
- return
+ return rank
}
// If we reached here, we have exhausted all classes without finding the peer
return peerRankInvalidDownload
@@ -84,24 +124,25 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
defer c.mu.Unlock()
for _, wp := range c.peerSelectors {
psp, err = wp.peerSelector.getNextPeer()
+ wp.lastCheckedTime = time.Now()
if err != nil {
- // TODO: No peers available in this class, move to the end of our list???
+ if errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
+ // We penalize this class the equivalent of one download failure (in case this is transient)
+ wp.downloadFailures++
+ }
continue
}
- return
+ return psp, nil
}
// If we reached here, we have exhausted all classes and still have no peers
- return
+ return nil, err
}
type wrappedPeerSelector struct {
- peerSelector peerSelector
- peerClass network.PeerOption
- toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's peerSelector
- downloadFailures int
- // TODO: Add a lastUsed time.Duration to this struct
+ peerSelector peerSelector // The unserlying peerSelector for this class
+ peerClass network.PeerOption // The class of peers the peerSelector is responsible for
+ toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's peerSelector
+ downloadFailures int // The number of times we have failed to download a block from this class's peerSelector since it was last reset
+ priority int // The original priority of the peerSelector, used for sorting
+ lastCheckedTime time.Time // The last time we tried to use the peerSelector
}
-
-// Logic: We try a class's peerSelector Y times, and if it fails, we move to the next class.
-// If we get to the last peerSelector and are still not getting blocks, we return an error.
-// NOTE: if a peerselector has no pools, we do not use it??
From 0d44937d9e588028cfbd76e5004506b11fc9ccfc Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Tue, 13 Feb 2024 17:22:03 -0500
Subject: [PATCH 03/15] Sub in classBasedPeerSelector for original in catchup
and catchpoint services.
---
catchup/catchpointService.go | 49 +++++++---
catchup/classBasedPeerSelector.go | 28 ++++--
catchup/service.go | 157 ++++++++++++++++++++++++------
catchup/service_test.go | 154 ++++++++++++++++++++---------
4 files changed, 286 insertions(+), 102 deletions(-)
diff --git a/catchup/catchpointService.go b/catchup/catchpointService.go
index 2c4f6dfc41..8554ccaf1c 100644
--- a/catchup/catchpointService.go
+++ b/catchup/catchpointService.go
@@ -18,6 +18,7 @@ package catchup
import (
"context"
+ "errors"
"fmt"
"sync"
"time"
@@ -69,7 +70,7 @@ type CatchpointCatchupStats struct {
type CatchpointCatchupService struct {
// stats is the statistics object, updated async while downloading the ledger
stats CatchpointCatchupStats
- // statsMu synchronizes access to stats, as we could attempt to update it while querying for it's current state
+ // statsMu synchronizes access to stats, as we could attempt to update it while querying for its current state
statsMu deadlock.Mutex
node CatchpointCatchupNodeServices
// ctx is the node cancellation context, used when the node is being stopped.
@@ -98,7 +99,7 @@ type CatchpointCatchupService struct {
abortCtx context.Context
abortCtxFunc context.CancelFunc
// blocksDownloadPeerSelector is the peer selector used for downloading blocks.
- blocksDownloadPeerSelector *peerSelector
+ blocksDownloadPeerSelector peerSelectorI
}
// MakeResumedCatchpointCatchupService creates a catchpoint catchup service for a node that is already in catchpoint catchup mode
@@ -506,14 +507,14 @@ func lookbackForStateproofsSupport(topBlock *bookkeeping.Block) uint64 {
return uint64(topBlock.Round().SubSaturate(lowestStateProofRound))
}
-// processStageBlocksDownload is the fourth catchpoint catchup stage. It downloads all the reminder of the blocks, verifying each one of them against it's predecessor.
+// processStageBlocksDownload is the fourth catchpoint catchup stage. It downloads all the reminder of the blocks, verifying each one of them against its predecessor.
func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
topBlock, err := cs.ledgerAccessor.EnsureFirstBlock(cs.ctx)
if err != nil {
return cs.abort(fmt.Errorf("processStageBlocksDownload failed, unable to ensure first block : %v", err))
}
- // pick the lookback with the greater of
+ // pick the lookback with the greatest of
// either (MaxTxnLife+DeeperBlockHeaderHistory+CatchpointLookback) or MaxBalLookback
// Explanation:
// 1. catchpoint snapshots accounts at round X-CatchpointLookback
@@ -537,7 +538,7 @@ func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
}
cs.statsMu.Lock()
- cs.stats.TotalBlocks = uint64(lookback)
+ cs.stats.TotalBlocks = lookback
cs.stats.AcquiredBlocks = 0
cs.stats.VerifiedBlocks = 0
cs.statsMu.Unlock()
@@ -558,8 +559,9 @@ func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
blk = &ledgerBlock
cert = &ledgerCert
} else {
- switch err0.(type) {
- case ledgercore.ErrNoEntry:
+ var errNoEntry ledgercore.ErrNoEntry
+ switch {
+ case errors.As(err0, &errNoEntry):
// this is expected, ignore this one.
default:
cs.log.Warnf("processStageBlocksDownload encountered the following error when attempting to retrieve the block for round %d : %v", topBlock.Round()-basics.Round(blocksFetched), err0)
@@ -658,7 +660,7 @@ func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
func (cs *CatchpointCatchupService) fetchBlock(round basics.Round, retryCount uint64) (blk *bookkeeping.Block, cert *agreement.Certificate, downloadDuration time.Duration, psp *peerSelectorPeer, stop bool, err error) {
psp, err = cs.blocksDownloadPeerSelector.getNextPeer()
if err != nil {
- if err == errPeerSelectorNoPeerPoolsAvailable {
+ if errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
cs.log.Infof("fetchBlock: unable to obtain a list of peers to retrieve the latest block from; will retry shortly.")
// this is a possible on startup, since the network package might have yet to retrieve the list of peers.
time.Sleep(noPeersAvailableSleepInterval)
@@ -718,7 +720,7 @@ func (cs *CatchpointCatchupService) processStageSwitch() (err error) {
// stopOrAbort is called when any of the stage processing function sees that cs.ctx has been canceled. It can be
// due to the end user attempting to abort the current catchpoint catchup operation or due to a node shutdown.
func (cs *CatchpointCatchupService) stopOrAbort() error {
- if cs.abortCtx.Err() == context.Canceled {
+ if errors.Is(cs.abortCtx.Err(), context.Canceled) {
return cs.abort(context.Canceled)
}
return nil
@@ -749,7 +751,7 @@ func (cs *CatchpointCatchupService) updateStage(newStage ledger.CatchpointCatchu
return nil
}
-// updateNodeCatchupMode requests the node to change it's operational mode from
+// updateNodeCatchupMode requests the node to change its operational mode from
// catchup mode to normal mode and vice versa.
func (cs *CatchpointCatchupService) updateNodeCatchupMode(catchupModeEnabled bool) {
newCtxCh := cs.node.SetCatchpointCatchupMode(catchupModeEnabled)
@@ -805,12 +807,27 @@ func (cs *CatchpointCatchupService) initDownloadPeerSelector() {
cs.blocksDownloadPeerSelector = cs.makeCatchpointPeerSelector()
}
-func (cs *CatchpointCatchupService) makeCatchpointPeerSelector() *peerSelector {
- return makePeerSelector(
- cs.net,
- []peerClass{
- {initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays},
- })
+func (cs *CatchpointCatchupService) makeCatchpointPeerSelector() peerSelectorI {
+ wrappedPeerSelectors := []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelectorIRenameMeLater: makePeerSelector(cs.net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelectorIRenameMeLater: makePeerSelector(cs.net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 10,
+ lastCheckedTime: time.Now(),
+ },
+ }
+
+ return makeClassBasedPeerSelector(wrappedPeerSelectors)
}
// checkLedgerDownload sends a HEAD request to the ledger endpoint of peers to validate the catchpoint's availability
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index 8d4762eb32..2d393bd6de 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -35,6 +35,16 @@ type classBasedPeerSelector struct {
peerSelectors []*wrappedPeerSelector
}
+func makeClassBasedPeerSelector(peerSelectors []*wrappedPeerSelector) *classBasedPeerSelector {
+ // Sort the peerSelectors by priority
+ sort.SliceStable(peerSelectors, func(i, j int) bool {
+ return peerSelectors[i].priority < peerSelectors[j].priority
+ })
+ return &classBasedPeerSelector{
+ peerSelectors: peerSelectors,
+ }
+}
+
func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int, int) {
c.mu.Lock()
defer c.mu.Unlock()
@@ -43,7 +53,7 @@ func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int,
poolIdx, peerIdx := -1, -1
for _, wp := range c.peerSelectors {
// See if the peer is in the class, ranking it appropriately if so
- poolIdx, peerIdx = wp.peerSelector.rankPeer(psp, rank)
+ poolIdx, peerIdx = wp.peerSelectorIRenameMeLater.rankPeer(psp, rank)
if poolIdx < 0 || peerIdx < 0 {
// Peer not found in this class
continue
@@ -107,7 +117,7 @@ func (c *classBasedPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPee
defer c.mu.Unlock()
for _, wp := range c.peerSelectors {
- rank = wp.peerSelector.peerDownloadDurationToRank(psp, blockDownloadDuration)
+ rank = wp.peerSelectorIRenameMeLater.peerDownloadDurationToRank(psp, blockDownloadDuration)
// If rank is peerRankInvalidDownload, we check the next class's peerSelector
if rank >= peerRankInvalidDownload {
continue
@@ -123,7 +133,7 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
c.mu.Lock()
defer c.mu.Unlock()
for _, wp := range c.peerSelectors {
- psp, err = wp.peerSelector.getNextPeer()
+ psp, err = wp.peerSelectorIRenameMeLater.getNextPeer()
wp.lastCheckedTime = time.Now()
if err != nil {
if errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
@@ -139,10 +149,10 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
}
type wrappedPeerSelector struct {
- peerSelector peerSelector // The unserlying peerSelector for this class
- peerClass network.PeerOption // The class of peers the peerSelector is responsible for
- toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's peerSelector
- downloadFailures int // The number of times we have failed to download a block from this class's peerSelector since it was last reset
- priority int // The original priority of the peerSelector, used for sorting
- lastCheckedTime time.Time // The last time we tried to use the peerSelector
+ peerSelectorIRenameMeLater peerSelectorI // The underlying peerSelector for this class
+ peerClass network.PeerOption // The class of peers the peerSelector is responsible for
+ toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's peerSelector
+ downloadFailures int // The number of times we have failed to download a block from this class's peerSelector since it was last reset
+ priority int // The original priority of the peerSelector, used for sorting
+ lastCheckedTime time.Time // The last time we tried to use the peerSelector
}
diff --git a/catchup/service.go b/catchup/service.go
index 58fc3ae6b8..0c355d0e34 100644
--- a/catchup/service.go
+++ b/catchup/service.go
@@ -38,9 +38,6 @@ import (
"github.com/algorand/go-algorand/util/execpool"
)
-const catchupPeersForSync = 10
-const blockQueryPeerLimit = 10
-
// uncapParallelDownloadRate is a simple threshold to detect whether or not the node is caught up.
// If a block is downloaded in less than this duration, it's assumed that the node is not caught up
// and allow the block downloader to start N=parallelBlocks concurrent fetches.
@@ -266,7 +263,7 @@ const errNoBlockForRoundThreshold = 5
// - If we couldn't fetch the block (e.g. if there are no peers available, or we've reached the catchupRetryLimit)
// - If the block is already in the ledger (e.g. if agreement service has already written it)
// - If the retrieval of the previous block was unsuccessful
-func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCompleteChan chan struct{}, lookbackComplete chan struct{}, peerSelector *peerSelector) bool {
+func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCompleteChan chan struct{}, lookbackComplete chan struct{}, peerSelector peerSelectorI) bool {
// If sync-ing this round is not intended, don't fetch it
if dontSyncRound := s.GetDisableSyncRound(); dontSyncRound != 0 && r >= basics.Round(dontSyncRound) {
return false
@@ -751,9 +748,9 @@ func (s *Service) fetchRound(cert agreement.Certificate, verifier *agreement.Asy
peerErrors := map[network.Peer]int{}
blockHash := bookkeeping.BlockHash(cert.Proposal.BlockDigest) // semantic digest (i.e., hash of the block header), not byte-for-byte digest
- peerSelector := createPeerSelector(s.net, s.cfg, false)
+ ps := createPeerSelector(s.net, s.cfg, false)
for s.ledger.LastRound() < cert.Round {
- psp, getPeerErr := peerSelector.getNextPeer()
+ psp, getPeerErr := ps.getNextPeer()
if getPeerErr != nil {
s.log.Debugf("fetchRound: was unable to obtain a peer to retrieve the block from")
s.net.RequestConnectOutgoing(true, s.ctx.Done())
@@ -788,14 +785,14 @@ func (s *Service) fetchRound(cert agreement.Certificate, verifier *agreement.Asy
// - peer selector punishes one of the peers more than the other
// - the punoshed peer gets the block, and the less punished peer stucks.
// It this case reset the peer selector to let it re-learn priorities.
- peerSelector = createPeerSelector(s.net, s.cfg, false)
+ ps = createPeerSelector(s.net, s.cfg, false)
}
}
peerErrors[peer]++
}
// remote peer doesn't have the block, try another peer
logging.Base().Warnf("fetchRound could not acquire block, fetcher errored out: %v", err)
- peerSelector.rankPeer(psp, failureRank)
+ ps.rankPeer(psp, failureRank)
continue
}
@@ -805,7 +802,7 @@ func (s *Service) fetchRound(cert agreement.Certificate, verifier *agreement.Asy
}
// Otherwise, fetcher gave us the wrong block
logging.Base().Warnf("fetcher gave us bad/wrong block (for round %d): fetched hash %v; want hash %v", cert.Round, block.Hash(), blockHash)
- peerSelector.rankPeer(psp, peerRankInvalidDownload)
+ ps.rankPeer(psp, peerRankInvalidDownload)
// As a failsafe, if the cert we fetched is valid but for the wrong block, panic as loudly as possible
if cert.Round == fetchedCert.Round &&
@@ -866,38 +863,138 @@ func (s *Service) roundIsNotSupported(nextRound basics.Round) bool {
return true
}
-func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch bool) *peerSelector {
- var peerClasses []peerClass
+func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch bool) peerSelectorI {
+ var wrappedPeerSelectors []*wrappedPeerSelector
+ //TODO: Revisit this ordering
if pipelineFetch {
if cfg.NetAddress != "" && cfg.EnableGossipService { // Relay node
- peerClasses = []peerClass{
- {initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut},
- {initialRank: peerRankInitialSecondPriority, peerClass: network.PeersPhonebookArchivalNodes},
- {initialRank: peerRankInitialThirdPriority, peerClass: network.PeersPhonebookRelays},
- {initialRank: peerRankInitialFourthPriority, peerClass: network.PeersConnectedIn},
+ wrappedPeerSelectors = []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 10,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersConnectedIn,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
+ priority: peerRankInitialFourthPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
}
} else {
- peerClasses = []peerClass{
- {initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes},
- {initialRank: peerRankInitialSecondPriority, peerClass: network.PeersConnectedOut},
- {initialRank: peerRankInitialThirdPriority, peerClass: network.PeersPhonebookRelays},
+ wrappedPeerSelectors = []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 10,
+ lastCheckedTime: time.Now(),
+ },
}
}
} else {
if cfg.NetAddress != "" && cfg.EnableGossipService { // Relay node
- peerClasses = []peerClass{
- {initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut},
- {initialRank: peerRankInitialSecondPriority, peerClass: network.PeersConnectedIn},
- {initialRank: peerRankInitialThirdPriority, peerClass: network.PeersPhonebookArchivalNodes},
- {initialRank: peerRankInitialFourthPriority, peerClass: network.PeersPhonebookRelays},
+ wrappedPeerSelectors = []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersConnectedIn,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 10,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
+ priority: peerRankInitialFourthPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
}
} else {
- peerClasses = []peerClass{
- {initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut},
- {initialRank: peerRankInitialSecondPriority, peerClass: network.PeersPhonebookArchivalNodes},
- {initialRank: peerRankInitialThirdPriority, peerClass: network.PeersPhonebookRelays},
+ wrappedPeerSelectors = []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 10,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelectorIRenameMeLater: makePeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
}
}
}
- return makePeerSelector(net, peerClasses)
+
+ return makeClassBasedPeerSelector(wrappedPeerSelectors)
}
diff --git a/catchup/service_test.go b/catchup/service_test.go
index 8deb692b0d..b96aca2933 100644
--- a/catchup/service_test.go
+++ b/catchup/service_test.go
@@ -967,16 +967,29 @@ func TestCreatePeerSelector(t *testing.T) {
s := MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
ps := createPeerSelector(s.net, s.cfg, true)
- require.Equal(t, 4, len(ps.peerClasses))
- require.Equal(t, peerRankInitialFirstPriority, ps.peerClasses[0].initialRank)
- require.Equal(t, peerRankInitialSecondPriority, ps.peerClasses[1].initialRank)
- require.Equal(t, peerRankInitialThirdPriority, ps.peerClasses[2].initialRank)
- require.Equal(t, peerRankInitialFourthPriority, ps.peerClasses[3].initialRank)
-
- require.Equal(t, network.PeersConnectedOut, ps.peerClasses[0].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, ps.peerClasses[1].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, ps.peerClasses[2].peerClass)
- require.Equal(t, network.PeersConnectedIn, ps.peerClasses[3].peerClass)
+ cps, ok := ps.(*classBasedPeerSelector)
+ require.True(t, ok)
+
+ require.Equal(t, 4, len(cps.peerSelectors))
+ require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
+ require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
+ require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
+ require.Equal(t, peerRankInitialFourthPriority, cps.peerSelectors[3].priority)
+
+ require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
+ require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[1].peerClass)
+ require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[2].peerClass)
+ require.Equal(t, network.PeersConnectedIn, cps.peerSelectors[3].peerClass)
+
+ require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
+ require.Equal(t, 3, cps.peerSelectors[1].toleranceFactor)
+ require.Equal(t, 10, cps.peerSelectors[2].toleranceFactor)
+ require.Equal(t, 3, cps.peerSelectors[3].toleranceFactor)
+
+ require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[3].lastCheckedTime.IsZero())
// cfg.NetAddress == ""; cfg.EnableGossipService = true; pipelineFetch = true
cfg.NetAddress = ""
@@ -984,14 +997,25 @@ func TestCreatePeerSelector(t *testing.T) {
s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
ps = createPeerSelector(s.net, s.cfg, true)
- require.Equal(t, 3, len(ps.peerClasses))
- require.Equal(t, peerRankInitialFirstPriority, ps.peerClasses[0].initialRank)
- require.Equal(t, peerRankInitialSecondPriority, ps.peerClasses[1].initialRank)
- require.Equal(t, peerRankInitialThirdPriority, ps.peerClasses[2].initialRank)
+ cps, ok = ps.(*classBasedPeerSelector)
+ require.True(t, ok)
+
+ require.Equal(t, 3, len(cps.peerSelectors))
+ require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
+ require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
+ require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
+
+ require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
+ require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[1].peerClass)
+ require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[2].peerClass)
+
+ require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
+ require.Equal(t, 3, cps.peerSelectors[1].toleranceFactor)
+ require.Equal(t, 10, cps.peerSelectors[2].toleranceFactor)
- require.Equal(t, network.PeersPhonebookArchivalNodes, ps.peerClasses[0].peerClass)
- require.Equal(t, network.PeersConnectedOut, ps.peerClasses[1].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, ps.peerClasses[2].peerClass)
+ require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
// cfg.NetAddress != ""; cfg.EnableGossipService = false; pipelineFetch = true
cfg.NetAddress = "someAddress"
@@ -999,14 +1023,25 @@ func TestCreatePeerSelector(t *testing.T) {
s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
ps = createPeerSelector(s.net, s.cfg, true)
- require.Equal(t, 3, len(ps.peerClasses))
- require.Equal(t, peerRankInitialFirstPriority, ps.peerClasses[0].initialRank)
- require.Equal(t, peerRankInitialSecondPriority, ps.peerClasses[1].initialRank)
- require.Equal(t, peerRankInitialThirdPriority, ps.peerClasses[2].initialRank)
+ cps, ok = ps.(*classBasedPeerSelector)
+ require.True(t, ok)
+
+ require.Equal(t, 3, len(cps.peerSelectors))
+ require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
+ require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
+ require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
+
+ require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
+ require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[1].peerClass)
+ require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[2].peerClass)
+
+ require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
+ require.Equal(t, 3, cps.peerSelectors[1].toleranceFactor)
+ require.Equal(t, 10, cps.peerSelectors[2].toleranceFactor)
- require.Equal(t, network.PeersPhonebookArchivalNodes, ps.peerClasses[0].peerClass)
- require.Equal(t, network.PeersConnectedOut, ps.peerClasses[1].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, ps.peerClasses[2].peerClass)
+ require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
// cfg.NetAddress != ""; cfg.EnableGossipService = true; pipelineFetch = false
cfg.NetAddress = "someAddress"
@@ -1014,16 +1049,19 @@ func TestCreatePeerSelector(t *testing.T) {
s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
ps = createPeerSelector(s.net, s.cfg, false)
- require.Equal(t, 4, len(ps.peerClasses))
- require.Equal(t, peerRankInitialFirstPriority, ps.peerClasses[0].initialRank)
- require.Equal(t, peerRankInitialSecondPriority, ps.peerClasses[1].initialRank)
- require.Equal(t, peerRankInitialThirdPriority, ps.peerClasses[2].initialRank)
- require.Equal(t, peerRankInitialFourthPriority, ps.peerClasses[3].initialRank)
+ cps, ok = ps.(*classBasedPeerSelector)
+ require.True(t, ok)
- require.Equal(t, network.PeersConnectedOut, ps.peerClasses[0].peerClass)
- require.Equal(t, network.PeersConnectedIn, ps.peerClasses[1].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, ps.peerClasses[2].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, ps.peerClasses[3].peerClass)
+ require.Equal(t, 4, len(cps.peerSelectors))
+ require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
+ require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
+ require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
+ require.Equal(t, peerRankInitialFourthPriority, cps.peerSelectors[3].priority)
+
+ require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
+ require.Equal(t, network.PeersConnectedIn, cps.peerSelectors[1].peerClass)
+ require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[2].peerClass)
+ require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[3].peerClass)
// cfg.NetAddress == ""; cfg.EnableGossipService = true; pipelineFetch = false
cfg.NetAddress = ""
@@ -1031,14 +1069,25 @@ func TestCreatePeerSelector(t *testing.T) {
s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
ps = createPeerSelector(s.net, s.cfg, false)
- require.Equal(t, 3, len(ps.peerClasses))
- require.Equal(t, peerRankInitialFirstPriority, ps.peerClasses[0].initialRank)
- require.Equal(t, peerRankInitialSecondPriority, ps.peerClasses[1].initialRank)
- require.Equal(t, peerRankInitialThirdPriority, ps.peerClasses[2].initialRank)
+ cps, ok = ps.(*classBasedPeerSelector)
+ require.True(t, ok)
+
+ require.Equal(t, 3, len(cps.peerSelectors))
+ require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
+ require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
+ require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
+
+ require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
+ require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[1].peerClass)
+ require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[2].peerClass)
+
+ require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
+ require.Equal(t, 10, cps.peerSelectors[1].toleranceFactor)
+ require.Equal(t, 3, cps.peerSelectors[2].toleranceFactor)
- require.Equal(t, network.PeersConnectedOut, ps.peerClasses[0].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, ps.peerClasses[1].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, ps.peerClasses[2].peerClass)
+ require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
// cfg.NetAddress != ""; cfg.EnableGossipService = false; pipelineFetch = false
cfg.NetAddress = "someAddress"
@@ -1046,14 +1095,25 @@ func TestCreatePeerSelector(t *testing.T) {
s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
ps = createPeerSelector(s.net, s.cfg, false)
- require.Equal(t, 3, len(ps.peerClasses))
- require.Equal(t, peerRankInitialFirstPriority, ps.peerClasses[0].initialRank)
- require.Equal(t, peerRankInitialSecondPriority, ps.peerClasses[1].initialRank)
- require.Equal(t, peerRankInitialThirdPriority, ps.peerClasses[2].initialRank)
+ cps, ok = ps.(*classBasedPeerSelector)
+ require.True(t, ok)
+
+ require.Equal(t, 3, len(cps.peerSelectors))
+ require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
+ require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
+ require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
+
+ require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
+ require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[1].peerClass)
+ require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[2].peerClass)
+
+ require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
+ require.Equal(t, 10, cps.peerSelectors[1].toleranceFactor)
+ require.Equal(t, 3, cps.peerSelectors[2].toleranceFactor)
- require.Equal(t, network.PeersConnectedOut, ps.peerClasses[0].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, ps.peerClasses[1].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, ps.peerClasses[2].peerClass)
+ require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
+ require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
}
func TestServiceStartStop(t *testing.T) {
From d99f0db5be4c68ad1761a4661fd17968f3882db3 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Wed, 14 Feb 2024 09:04:47 -0500
Subject: [PATCH 04/15] Naming revamp.
---
catchup/catchpointService.go | 58 +++++++++++++++---------------
catchup/classBasedPeerSelector.go | 28 +++++++--------
catchup/peerSelector.go | 30 ++++++++--------
catchup/peerSelector_test.go | 22 ++++++------
catchup/service.go | 60 ++++++++++++++++---------------
5 files changed, 101 insertions(+), 97 deletions(-)
diff --git a/catchup/catchpointService.go b/catchup/catchpointService.go
index 8554ccaf1c..2c64f408aa 100644
--- a/catchup/catchpointService.go
+++ b/catchup/catchpointService.go
@@ -99,7 +99,7 @@ type CatchpointCatchupService struct {
abortCtx context.Context
abortCtxFunc context.CancelFunc
// blocksDownloadPeerSelector is the peer selector used for downloading blocks.
- blocksDownloadPeerSelector peerSelectorI
+ blocksDownloadPeerSelector peerSelector
}
// MakeResumedCatchpointCatchupService creates a catchpoint catchup service for a node that is already in catchpoint catchup mode
@@ -281,7 +281,7 @@ func (cs *CatchpointCatchupService) processStageInactive() (err error) {
}
// processStageLedgerDownload is the second catchpoint catchup stage. It downloads the ledger.
-func (cs *CatchpointCatchupService) processStageLedgerDownload() (err error) {
+func (cs *CatchpointCatchupService) processStageLedgerDownload() error {
cs.statsMu.Lock()
label := cs.stats.CatchpointLabel
cs.statsMu.Unlock()
@@ -292,40 +292,40 @@ func (cs *CatchpointCatchupService) processStageLedgerDownload() (err error) {
}
// download balances file.
- peerSelector := cs.makeCatchpointPeerSelector()
- ledgerFetcher := makeLedgerFetcher(cs.net, cs.ledgerAccessor, cs.log, cs, cs.config)
+ ps := cs.makeCatchpointPeerSelector()
+ lf := makeLedgerFetcher(cs.net, cs.ledgerAccessor, cs.log, cs, cs.config)
attemptsCount := 0
for {
attemptsCount++
- err = cs.ledgerAccessor.ResetStagingBalances(cs.ctx, true)
- if err != nil {
+ err1 := cs.ledgerAccessor.ResetStagingBalances(cs.ctx, true)
+ if err1 != nil {
if cs.ctx.Err() != nil {
return cs.stopOrAbort()
}
- return cs.abort(fmt.Errorf("processStageLedgerDownload failed to reset staging balances : %v", err))
+ return cs.abort(fmt.Errorf("processStageLedgerDownload failed to reset staging balances : %v", err1))
}
- psp, err := peerSelector.getNextPeer()
- if err != nil {
- err = fmt.Errorf("processStageLedgerDownload: catchpoint catchup was unable to obtain a list of peers to retrieve the catchpoint file from")
- return cs.abort(err)
+ psp, err1 := ps.getNextPeer()
+ if err1 != nil {
+ err1 = fmt.Errorf("processStageLedgerDownload: catchpoint catchup was unable to obtain a list of peers to retrieve the catchpoint file from")
+ return cs.abort(err1)
}
peer := psp.Peer
start := time.Now()
- err = ledgerFetcher.downloadLedger(cs.ctx, peer, round)
- if err == nil {
+ err1 = lf.downloadLedger(cs.ctx, peer, round)
+ if err1 == nil {
cs.log.Infof("ledger downloaded in %d seconds", time.Since(start)/time.Second)
start = time.Now()
- err = cs.ledgerAccessor.BuildMerkleTrie(cs.ctx, cs.updateVerifiedCounts)
- if err == nil {
+ err1 = cs.ledgerAccessor.BuildMerkleTrie(cs.ctx, cs.updateVerifiedCounts)
+ if err1 == nil {
cs.log.Infof("built merkle trie in %d seconds", time.Since(start)/time.Second)
break
}
// failed to build the merkle trie for the above catchpoint file.
- peerSelector.rankPeer(psp, peerRankInvalidDownload)
+ ps.rankPeer(psp, peerRankInvalidDownload)
} else {
- peerSelector.rankPeer(psp, peerRankDownloadFailed)
+ ps.rankPeer(psp, peerRankDownloadFailed)
}
// instead of testing for err == cs.ctx.Err() , we'll check on the context itself.
@@ -336,15 +336,15 @@ func (cs *CatchpointCatchupService) processStageLedgerDownload() (err error) {
}
if attemptsCount >= cs.config.CatchupLedgerDownloadRetryAttempts {
- err = fmt.Errorf("processStageLedgerDownload: catchpoint catchup exceeded number of attempts to retrieve ledger")
- return cs.abort(err)
+ err1 = fmt.Errorf("processStageLedgerDownload: catchpoint catchup exceeded number of attempts to retrieve ledger")
+ return cs.abort(err1)
}
- cs.log.Warnf("unable to download ledger : %v", err)
+ cs.log.Warnf("unable to download ledger : %v", err1)
}
- err = cs.updateStage(ledger.CatchpointCatchupStateLatestBlockDownload)
- if err != nil {
- return cs.abort(fmt.Errorf("processStageLedgerDownload failed to update stage to CatchpointCatchupStateLatestBlockDownload : %v", err))
+ err0 = cs.updateStage(ledger.CatchpointCatchupStateLatestBlockDownload)
+ if err0 != nil {
+ return cs.abort(fmt.Errorf("processStageLedgerDownload failed to update stage to CatchpointCatchupStateLatestBlockDownload : %v", err0))
}
return nil
}
@@ -532,7 +532,7 @@ func (cs *CatchpointCatchupService) processStageBlocksDownload() (err error) {
}
// in case the effective lookback is going before our rounds count, trim it there.
- // ( a catchpoint is generated starting round MaxBalLookback, and this is a possible in any round in the range of MaxBalLookback..MaxTxnLife)
+ // ( a catchpoint is generated starting round MaxBalLookback, and this is a possible in any round in the range of MaxBalLookback...MaxTxnLife)
if lookback >= uint64(topBlock.Round()) {
lookback = uint64(topBlock.Round() - 1)
}
@@ -807,11 +807,11 @@ func (cs *CatchpointCatchupService) initDownloadPeerSelector() {
cs.blocksDownloadPeerSelector = cs.makeCatchpointPeerSelector()
}
-func (cs *CatchpointCatchupService) makeCatchpointPeerSelector() peerSelectorI {
+func (cs *CatchpointCatchupService) makeCatchpointPeerSelector() peerSelector {
wrappedPeerSelectors := []*wrappedPeerSelector{
{
peerClass: network.PeersPhonebookRelays,
- peerSelectorIRenameMeLater: makePeerSelector(cs.net,
+ peerSelector: makeRankPooledPeerSelector(cs.net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
@@ -819,7 +819,7 @@ func (cs *CatchpointCatchupService) makeCatchpointPeerSelector() peerSelectorI {
},
{
peerClass: network.PeersPhonebookArchivalNodes,
- peerSelectorIRenameMeLater: makePeerSelector(cs.net,
+ peerSelector: makeRankPooledPeerSelector(cs.net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
priority: peerRankInitialSecondPriority,
toleranceFactor: 10,
@@ -838,10 +838,10 @@ func (cs *CatchpointCatchupService) checkLedgerDownload() error {
if err != nil {
return fmt.Errorf("failed to parse catchpoint label : %v", err)
}
- peerSelector := cs.makeCatchpointPeerSelector()
+ ps := cs.makeCatchpointPeerSelector()
ledgerFetcher := makeLedgerFetcher(cs.net, cs.ledgerAccessor, cs.log, cs, cs.config)
for i := 0; i < cs.config.CatchupLedgerDownloadRetryAttempts; i++ {
- psp, peerError := peerSelector.getNextPeer()
+ psp, peerError := ps.getNextPeer()
if peerError != nil {
return err
}
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index 2d393bd6de..01f6aab75e 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -24,10 +24,10 @@ import (
"time"
)
-// The duration after which we reset the downloadFailures for a peerSelector
+// The duration after which we reset the downloadFailures for a rankPooledPeerSelector
const lastCheckedDuration = 10 * time.Minute
-// classBasedPeerSelector is a peerSelector that tracks and ranks classes of peers based on their response behavior.
+// classBasedPeerSelector is a rankPooledPeerSelector that tracks and ranks classes of peers based on their response behavior.
// It is used to select the most appropriate peers to download blocks from - this is most useful when catching up
// and needing to figure out whether the blocks can be retrieved from relay nodes or require archive nodes.
type classBasedPeerSelector struct {
@@ -53,7 +53,7 @@ func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int,
poolIdx, peerIdx := -1, -1
for _, wp := range c.peerSelectors {
// See if the peer is in the class, ranking it appropriately if so
- poolIdx, peerIdx = wp.peerSelectorIRenameMeLater.rankPeer(psp, rank)
+ poolIdx, peerIdx = wp.peerSelector.rankPeer(psp, rank)
if poolIdx < 0 || peerIdx < 0 {
// Peer not found in this class
continue
@@ -83,10 +83,10 @@ func (c *classBasedPeerSelector) sortPeerSelectors() {
psUnderTolerance := make([]*wrappedPeerSelector, 0, len(c.peerSelectors))
psOverTolerance := make([]*wrappedPeerSelector, 0, len(c.peerSelectors))
for _, wp := range c.peerSelectors {
- // If the peerSelector's download failures have not been reset in a while, we reset them
+ // If the rankPooledPeerSelector's download failures have not been reset in a while, we reset them
if time.Since(wp.lastCheckedTime) > lastCheckedDuration {
wp.downloadFailures = 0
- // Reset again here, so we don't keep resetting the same peerSelector
+ // Reset again here, so we don't keep resetting the same rankPooledPeerSelector
wp.lastCheckedTime = time.Now()
}
@@ -117,8 +117,8 @@ func (c *classBasedPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPee
defer c.mu.Unlock()
for _, wp := range c.peerSelectors {
- rank = wp.peerSelectorIRenameMeLater.peerDownloadDurationToRank(psp, blockDownloadDuration)
- // If rank is peerRankInvalidDownload, we check the next class's peerSelector
+ rank = wp.peerSelector.peerDownloadDurationToRank(psp, blockDownloadDuration)
+ // If rank is peerRankInvalidDownload, we check the next class's rankPooledPeerSelector
if rank >= peerRankInvalidDownload {
continue
}
@@ -133,7 +133,7 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
c.mu.Lock()
defer c.mu.Unlock()
for _, wp := range c.peerSelectors {
- psp, err = wp.peerSelectorIRenameMeLater.getNextPeer()
+ psp, err = wp.peerSelector.getNextPeer()
wp.lastCheckedTime = time.Now()
if err != nil {
if errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
@@ -149,10 +149,10 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
}
type wrappedPeerSelector struct {
- peerSelectorIRenameMeLater peerSelectorI // The underlying peerSelector for this class
- peerClass network.PeerOption // The class of peers the peerSelector is responsible for
- toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's peerSelector
- downloadFailures int // The number of times we have failed to download a block from this class's peerSelector since it was last reset
- priority int // The original priority of the peerSelector, used for sorting
- lastCheckedTime time.Time // The last time we tried to use the peerSelector
+ peerSelector peerSelector // The underlying rankPooledPeerSelector for this class
+ peerClass network.PeerOption // The class of peers the rankPooledPeerSelector is responsible for
+ toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's rankPooledPeerSelector
+ downloadFailures int // The number of times we have failed to download a block from this class's rankPooledPeerSelector since it was last reset
+ priority int // The original priority of the rankPooledPeerSelector, used for sorting
+ lastCheckedTime time.Time // The last time we tried to use the rankPooledPeerSelector
}
diff --git a/catchup/peerSelector.go b/catchup/peerSelector.go
index a705c3c5db..1485295581 100644
--- a/catchup/peerSelector.go
+++ b/catchup/peerSelector.go
@@ -88,7 +88,7 @@ type peerClass struct {
peerClass network.PeerOption
}
-// the peersRetriever is a subset of the network.GossipNode used to ensure that we can create an instance of the peerSelector
+// the peersRetriever is a subset of the network.GossipNode used to ensure that we can create an instance of the rankPooledPeerSelector
// for testing purposes, providing just the above function.
type peersRetriever interface {
// Get a list of Peers we could potentially send a direct message to.
@@ -109,20 +109,20 @@ type peerPool struct {
peers []peerPoolEntry
}
-type peerSelectorI interface {
+type peerSelector interface {
rankPeer(psp *peerSelectorPeer, rank int) (int, int)
peerDownloadDurationToRank(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int)
getNextPeer() (psp *peerSelectorPeer, err error)
}
-// peerSelector is a helper struct used to select the next peer to try and connect to
+// rankPooledPeerSelector is a helper struct used to select the next peer to try and connect to
// for various catchup purposes. Unlike the underlying network GetPeers(), it allows the
// client to provide feedback regarding the peer's performance, and to have the subsequent
// query(s) take advantage of that intel.
-type peerSelector struct {
+type rankPooledPeerSelector struct {
mu deadlock.Mutex
net peersRetriever
- // peerClasses is the list of peer classes we want to have in the peerSelector.
+ // peerClasses is the list of peer classes we want to have in the rankPooledPeerSelector.
peerClasses []peerClass
// pools is the list of peer pools, each pool contains a list of peers with the same rank.
pools []peerPool
@@ -290,9 +290,9 @@ func (hs *historicStats) push(value int, counter uint64, class peerClass) (avera
return bounded
}
-// makePeerSelector creates a peerSelector, given a peersRetriever and peerClass array.
-func makePeerSelector(net peersRetriever, initialPeersClasses []peerClass) *peerSelector {
- selector := &peerSelector{
+// makeRankPooledPeerSelector creates a rankPooledPeerSelector, given a peersRetriever and peerClass array.
+func makeRankPooledPeerSelector(net peersRetriever, initialPeersClasses []peerClass) *rankPooledPeerSelector {
+ selector := &rankPooledPeerSelector{
net: net,
peerClasses: initialPeersClasses,
}
@@ -302,7 +302,7 @@ func makePeerSelector(net peersRetriever, initialPeersClasses []peerClass) *peer
// getNextPeer returns the next peer. It randomally selects a peer from a pool that has
// the lowest rank value. Given that the peers are grouped by their ranks, allow us to
// prioritize peers based on their class and/or performance.
-func (ps *peerSelector) getNextPeer() (psp *peerSelectorPeer, err error) {
+func (ps *rankPooledPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error) {
ps.mu.Lock()
defer ps.mu.Unlock()
ps.refreshAvailablePeers()
@@ -323,7 +323,7 @@ func (ps *peerSelector) getNextPeer() (psp *peerSelectorPeer, err error) {
// rankPeer ranks a given peer.
// return the old value and the new updated value.
// updated value could be different from the input rank.
-func (ps *peerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int, int) {
+func (ps *rankPooledPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int, int) {
if psp == nil {
return -1, -1
}
@@ -390,7 +390,7 @@ func (ps *peerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int, int) {
}
// peerDownloadDurationToRank calculates the rank for a peer given a peer and the block download time.
-func (ps *peerSelector) peerDownloadDurationToRank(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
+func (ps *rankPooledPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
ps.mu.Lock()
defer ps.mu.Unlock()
poolIdx, peerIdx := ps.findPeer(psp)
@@ -415,7 +415,7 @@ func (ps *peerSelector) peerDownloadDurationToRank(psp *peerSelectorPeer, blockD
// addToPool adds a given peer to the correct group. If no group exists for that peer's rank,
// a new group is created.
// The method return true if a new group was created ( suggesting that the pools list would need to be re-ordered ), or false otherwise.
-func (ps *peerSelector) addToPool(peer network.Peer, rank int, class peerClass, peerHistory *historicStats) bool {
+func (ps *rankPooledPeerSelector) addToPool(peer network.Peer, rank int, class peerClass, peerHistory *historicStats) bool {
// see if we already have a list with that rank:
for i, pool := range ps.pools {
if pool.rank == rank {
@@ -429,7 +429,7 @@ func (ps *peerSelector) addToPool(peer network.Peer, rank int, class peerClass,
}
// sort the pools array in an ascending order according to the rank of each pool.
-func (ps *peerSelector) sort() {
+func (ps *rankPooledPeerSelector) sort() {
sort.SliceStable(ps.pools, func(i, j int) bool {
return ps.pools[i].rank < ps.pools[j].rank
})
@@ -449,7 +449,7 @@ func peerAddress(peer network.Peer) string {
// refreshAvailablePeers reload the available peers from the network package, add new peers along with their
// corresponding initial rank, and deletes peers that have been dropped by the network package.
-func (ps *peerSelector) refreshAvailablePeers() {
+func (ps *rankPooledPeerSelector) refreshAvailablePeers() {
existingPeers := make(map[network.PeerOption]map[string]bool)
for _, pool := range ps.pools {
for _, localPeer := range pool.peers {
@@ -507,7 +507,7 @@ func (ps *peerSelector) refreshAvailablePeers() {
// findPeer look into the peer pool and find the given peer.
// The method returns the pool and peer indices if a peer was found, or (-1, -1) otherwise.
-func (ps *peerSelector) findPeer(psp *peerSelectorPeer) (poolIdx, peerIdx int) {
+func (ps *rankPooledPeerSelector) findPeer(psp *peerSelectorPeer) (poolIdx, peerIdx int) {
peerAddr := peerAddress(psp.Peer)
if peerAddr == "" {
return -1, -1
diff --git a/catchup/peerSelector_test.go b/catchup/peerSelector_test.go
index aa8d348d43..7aa373d280 100644
--- a/catchup/peerSelector_test.go
+++ b/catchup/peerSelector_test.go
@@ -131,7 +131,7 @@ func TestPeerSelector_RankPeer(t *testing.T) {
peers := []network.Peer{&mockHTTPPeer{address: "12345"}}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) []network.Peer {
return peers
}), []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}},
@@ -191,7 +191,7 @@ func TestPeerSelector_PeerDownloadRanking(t *testing.T) {
peers1 := []network.Peer{&mockHTTPPeer{address: "1234"}, &mockHTTPPeer{address: "5678"}}
peers2 := []network.Peer{&mockHTTPPeer{address: "abcd"}, &mockHTTPPeer{address: "efgh"}}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) (peers []network.Peer) {
for _, opt := range options {
if opt == network.PeersPhonebookArchivalNodes {
@@ -240,7 +240,7 @@ func TestPeerSelector_FindMissingPeer(t *testing.T) {
partitiontest.PartitionTest(t)
t.Parallel()
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) []network.Peer {
return []network.Peer{}
}), []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}},
@@ -258,7 +258,7 @@ func TestPeerSelector_HistoricData(t *testing.T) {
peers1 := []network.Peer{&mockHTTPPeer{address: "a1"}, &mockHTTPPeer{address: "a2"}, &mockHTTPPeer{address: "a3"}}
peers2 := []network.Peer{&mockHTTPPeer{address: "b1"}, &mockHTTPPeer{address: "b2"}}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) (peers []network.Peer) {
for _, opt := range options {
if opt == network.PeersPhonebookArchivalNodes {
@@ -332,7 +332,7 @@ func TestPeerSelector_PeersDownloadFailed(t *testing.T) {
peers1 := []network.Peer{&mockHTTPPeer{address: "a1"}, &mockHTTPPeer{address: "a2"}, &mockHTTPPeer{address: "a3"}}
peers2 := []network.Peer{&mockHTTPPeer{address: "b1"}, &mockHTTPPeer{address: "b2"}}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) (peers []network.Peer) {
for _, opt := range options {
if opt == network.PeersPhonebookArchivalNodes {
@@ -408,7 +408,7 @@ func TestPeerSelector_Penalty(t *testing.T) {
peers1 := []network.Peer{&mockHTTPPeer{address: "a1"}, &mockHTTPPeer{address: "a2"}, &mockHTTPPeer{address: "a3"}}
peers2 := []network.Peer{&mockHTTPPeer{address: "b1"}, &mockHTTPPeer{address: "b2"}}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) (peers []network.Peer) {
for _, opt := range options {
if opt == network.PeersPhonebookArchivalNodes {
@@ -469,7 +469,7 @@ func TestPeerSelector_PeerDownloadDurationToRank(t *testing.T) {
peers3 := []network.Peer{&mockHTTPPeer{address: "c1"}, &mockHTTPPeer{address: "c2"}}
peers4 := []network.Peer{&mockHTTPPeer{address: "d1"}, &mockHTTPPeer{address: "b2"}}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) (peers []network.Peer) {
for _, opt := range options {
if opt == network.PeersPhonebookRelays {
@@ -574,7 +574,7 @@ func TestPeerSelector_ClassUpperBound(t *testing.T) {
peers1 := []network.Peer{&mockHTTPPeer{address: "a1"}, &mockHTTPPeer{address: "a2"}}
pClass := peerClass{initialRank: peerRankInitialSecondPriority, peerClass: network.PeersPhonebookArchivalNodes}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) (peers []network.Peer) {
for _, opt := range options {
if opt == network.PeersPhonebookArchivalNodes {
@@ -609,7 +609,7 @@ func TestPeerSelector_ClassLowerBound(t *testing.T) {
peers1 := []network.Peer{&mockHTTPPeer{address: "a1"}, &mockHTTPPeer{address: "a2"}}
pClass := peerClass{initialRank: peerRankInitialSecondPriority, peerClass: network.PeersPhonebookArchivalNodes}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) (peers []network.Peer) {
for _, opt := range options {
if opt == network.PeersPhonebookArchivalNodes {
@@ -639,7 +639,7 @@ func TestPeerSelector_EvictionAndUpgrade(t *testing.T) {
peers1 := []network.Peer{&mockHTTPPeer{address: "a1"}}
peers2 := []network.Peer{&mockHTTPPeer{address: "a1"}}
- peerSelector := makePeerSelector(
+ peerSelector := makeRankPooledPeerSelector(
makePeersRetrieverStub(func(options ...network.PeerOption) (peers []network.Peer) {
for _, opt := range options {
if opt == network.PeersPhonebookArchivalNodes {
@@ -677,7 +677,7 @@ func TestPeerSelector_RefreshAvailablePeers(t *testing.T) {
// check new peers added to the pool
p1 := mockHTTPPeer{address: "p1"}
p2 := mockHTTPPeer{address: "p2"}
- ps := peerSelector{
+ ps := rankPooledPeerSelector{
peerClasses: []peerClass{
{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut},
{initialRank: peerRankInitialSecondPriority, peerClass: network.PeersPhonebookArchivalNodes},
diff --git a/catchup/service.go b/catchup/service.go
index 0c355d0e34..eb9fc2a57a 100644
--- a/catchup/service.go
+++ b/catchup/service.go
@@ -38,7 +38,7 @@ import (
"github.com/algorand/go-algorand/util/execpool"
)
-// uncapParallelDownloadRate is a simple threshold to detect whether or not the node is caught up.
+// uncapParallelDownloadRate is a simple threshold to detect whether the node is caught up.
// If a block is downloaded in less than this duration, it's assumed that the node is not caught up
// and allow the block downloader to start N=parallelBlocks concurrent fetches.
const uncapParallelDownloadRate = time.Second
@@ -263,7 +263,7 @@ const errNoBlockForRoundThreshold = 5
// - If we couldn't fetch the block (e.g. if there are no peers available, or we've reached the catchupRetryLimit)
// - If the block is already in the ledger (e.g. if agreement service has already written it)
// - If the retrieval of the previous block was unsuccessful
-func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCompleteChan chan struct{}, lookbackComplete chan struct{}, peerSelector peerSelectorI) bool {
+func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCompleteChan chan struct{}, lookbackComplete chan struct{}, peerSelector peerSelector) bool {
// If sync-ing this round is not intended, don't fetch it
if dontSyncRound := s.GetDisableSyncRound(); dontSyncRound != 0 && r >= basics.Round(dontSyncRound) {
return false
@@ -315,7 +315,7 @@ func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCo
block, cert, blockDownloadDuration, err := s.innerFetch(ctx, r, peer)
if err != nil {
- if err == errLedgerAlreadyHasBlock {
+ if errors.Is(err, errLedgerAlreadyHasBlock) {
// ledger already has the block, no need to request this block.
// only the agreement could have added this block into the ledger, catchup is complete
s.log.Infof("fetchAndWrite(%d): the block is already in the ledger. The catchup is complete", r)
@@ -326,7 +326,7 @@ func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCo
if errors.As(err, &nbfe) {
failureRank = peerRankNoBlockForRound
// remote peer doesn't have the block, try another peer
- // quit if the the same peer peer encountered errNoBlockForRound more than errNoBlockForRoundThreshold times
+ // quit if the same peer encountered errNoBlockForRound more than errNoBlockForRoundThreshold times
if s.followLatest {
// back off between retries to allow time for the next block to appear;
// this will provide 50s (catchupRetryLimit * followLatestBackoff) of
@@ -424,7 +424,8 @@ func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCo
// if the context expired, just exit.
return false
}
- if errNSBE, ok := err.(ledgercore.ErrNonSequentialBlockEval); ok && errNSBE.EvaluatorRound <= errNSBE.LatestRound {
+ var errNSBE ledgercore.ErrNonSequentialBlockEval
+ if errors.As(err, &errNSBE) && errNSBE.EvaluatorRound <= errNSBE.LatestRound {
// the block was added to the ledger from elsewhere after fetching it here
// only the agreement could have added this block into the ledger, catchup is complete
s.log.Infof("fetchAndWrite(%d): after fetching the block, it is already in the ledger. The catchup is complete", r)
@@ -439,18 +440,21 @@ func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCo
}
if err != nil {
- switch err.(type) {
- case ledgercore.ErrNonSequentialBlockEval:
+ var errNonSequentialBlockEval ledgercore.ErrNonSequentialBlockEval
+ var blockInLedgerError ledgercore.BlockInLedgerError
+ var err1 protocol.Error
+ switch {
+ case errors.As(err, &errNonSequentialBlockEval):
s.log.Infof("fetchAndWrite(%d): no need to re-evaluate historical block", r)
return true
- case ledgercore.BlockInLedgerError:
+ case errors.As(err, &blockInLedgerError):
// the block was added to the ledger from elsewhere after fetching it here
// only the agreement could have added this block into the ledger, catchup is complete
s.log.Infof("fetchAndWrite(%d): after fetching the block, it is already in the ledger. The catchup is complete", r)
return false
- case protocol.Error:
+ case errors.As(err, &err1):
if !s.protocolErrorLogged {
- logging.Base().Errorf("fetchAndWrite(%v): unrecoverable protocol error detected: %v", r, err)
+ logging.Base().Errorf("fetchAndWrite(%v): unrecoverable protocol err1 detected: %v", r, err)
s.protocolErrorLogged = true
}
default:
@@ -488,8 +492,8 @@ func (s *Service) pipelinedFetch(seedLookback uint64) {
}
}()
- peerSelector := createPeerSelector(s.net, s.cfg, true)
- if _, err := peerSelector.getNextPeer(); err == errPeerSelectorNoPeerPoolsAvailable {
+ ps := createPeerSelector(s.net, s.cfg, true)
+ if _, err := ps.getNextPeer(); errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
s.log.Debugf("pipelinedFetch: was unable to obtain a peer to retrieve the block from")
return
}
@@ -524,7 +528,7 @@ func (s *Service) pipelinedFetch(seedLookback uint64) {
go func(r basics.Round) {
prev := s.ledger.WaitMem(r - 1)
seed := s.ledger.WaitMem(r.SubSaturate(basics.Round(seedLookback)))
- done <- s.fetchAndWrite(ctx, r, prev, seed, peerSelector)
+ done <- s.fetchAndWrite(ctx, r, prev, seed, ps)
wg.Done()
}(nextRound)
@@ -863,7 +867,7 @@ func (s *Service) roundIsNotSupported(nextRound basics.Round) bool {
return true
}
-func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch bool) peerSelectorI {
+func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch bool) peerSelector {
var wrappedPeerSelectors []*wrappedPeerSelector
//TODO: Revisit this ordering
if pipelineFetch {
@@ -871,7 +875,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
wrappedPeerSelectors = []*wrappedPeerSelector{
{
peerClass: network.PeersConnectedOut,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
@@ -879,7 +883,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersPhonebookRelays,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
priority: peerRankInitialSecondPriority,
toleranceFactor: 3,
@@ -887,7 +891,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersPhonebookArchivalNodes,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
priority: peerRankInitialThirdPriority,
toleranceFactor: 10,
@@ -895,7 +899,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersConnectedIn,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
priority: peerRankInitialFourthPriority,
toleranceFactor: 3,
@@ -906,7 +910,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
wrappedPeerSelectors = []*wrappedPeerSelector{
{
peerClass: network.PeersConnectedOut,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
@@ -914,7 +918,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersPhonebookRelays,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
priority: peerRankInitialSecondPriority,
toleranceFactor: 3,
@@ -922,7 +926,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersPhonebookArchivalNodes,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
priority: peerRankInitialThirdPriority,
toleranceFactor: 10,
@@ -935,7 +939,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
wrappedPeerSelectors = []*wrappedPeerSelector{
{
peerClass: network.PeersConnectedOut,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
@@ -943,7 +947,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersConnectedIn,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
priority: peerRankInitialSecondPriority,
toleranceFactor: 3,
@@ -951,7 +955,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersPhonebookArchivalNodes,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
priority: peerRankInitialThirdPriority,
toleranceFactor: 10,
@@ -959,7 +963,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersPhonebookRelays,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
priority: peerRankInitialFourthPriority,
toleranceFactor: 3,
@@ -970,7 +974,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
wrappedPeerSelectors = []*wrappedPeerSelector{
{
peerClass: network.PeersConnectedOut,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
@@ -978,7 +982,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersPhonebookArchivalNodes,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
priority: peerRankInitialSecondPriority,
toleranceFactor: 10,
@@ -986,7 +990,7 @@ func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch
},
{
peerClass: network.PeersPhonebookRelays,
- peerSelectorIRenameMeLater: makePeerSelector(net,
+ peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
priority: peerRankInitialThirdPriority,
toleranceFactor: 3,
From ccdb22d1b82d53dcd743902e7fde324878cac4f4 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Thu, 15 Feb 2024 12:35:55 -0500
Subject: [PATCH 05/15] Initial cut of unit tests for classBasedPeerSelector.
---
catchup/classBasedPeerSelector.go | 11 +-
catchup/classBasedPeerSelector_test.go | 322 +++++++++++++++++++++++++
2 files changed, 328 insertions(+), 5 deletions(-)
create mode 100644 catchup/classBasedPeerSelector_test.go
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index 01f6aab75e..9fecc23e72 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -50,11 +50,11 @@ func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int,
defer c.mu.Unlock()
peerSelectorSortNeeded := false
- poolIdx, peerIdx := -1, -1
+ oldRank, newRank := -1, -1
for _, wp := range c.peerSelectors {
// See if the peer is in the class, ranking it appropriately if so
- poolIdx, peerIdx = wp.peerSelector.rankPeer(psp, rank)
- if poolIdx < 0 || peerIdx < 0 {
+ oldRank, newRank = wp.peerSelector.rankPeer(psp, rank)
+ if oldRank < 0 || newRank < 0 {
// Peer not found in this class
continue
}
@@ -74,7 +74,7 @@ func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int,
c.sortPeerSelectors()
}
- return poolIdx, peerIdx
+ return oldRank, newRank
}
// sortPeerSelectors sorts the peerSelectors by tolerance factor violation, and then by priority
@@ -136,6 +136,7 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
psp, err = wp.peerSelector.getNextPeer()
wp.lastCheckedTime = time.Now()
if err != nil {
+ // This is mostly just future-proofing, as we don't expect any other errors from getNextPeer
if errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
// We penalize this class the equivalent of one download failure (in case this is transient)
wp.downloadFailures++
@@ -152,7 +153,7 @@ type wrappedPeerSelector struct {
peerSelector peerSelector // The underlying rankPooledPeerSelector for this class
peerClass network.PeerOption // The class of peers the rankPooledPeerSelector is responsible for
toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's rankPooledPeerSelector
- downloadFailures int // The number of times we have failed to download a block from this class's rankPooledPeerSelector since it was last reset
priority int // The original priority of the rankPooledPeerSelector, used for sorting
+ downloadFailures int // The number of times we have failed to download a block from this class's rankPooledPeerSelector since it was last reset
lastCheckedTime time.Time // The last time we tried to use the rankPooledPeerSelector
}
diff --git a/catchup/classBasedPeerSelector_test.go b/catchup/classBasedPeerSelector_test.go
new file mode 100644
index 0000000000..0c86bddd56
--- /dev/null
+++ b/catchup/classBasedPeerSelector_test.go
@@ -0,0 +1,322 @@
+package catchup
+
+import (
+ "github.com/algorand/go-algorand/network"
+ "github.com/algorand/go-algorand/test/partitiontest"
+ "github.com/stretchr/testify/require"
+ "testing"
+ "time"
+)
+
+// Use to mock the wrapped peer selectors where warranted
+type mockPeerSelector struct {
+ mockRankPeer func(psp *peerSelectorPeer, rank int) (int, int)
+ mockPeerDownloadDurationToRank func(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int)
+ mockGetNextPeer func() (psp *peerSelectorPeer, err error)
+}
+
+func (m mockPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int, int) {
+ return m.mockRankPeer(psp, rank)
+}
+
+func (m mockPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
+ return m.mockPeerDownloadDurationToRank(psp, blockDownloadDuration)
+}
+
+func (m mockPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error) {
+ return m.mockGetNextPeer()
+}
+
+func TestClassBasedPeerSelector_makeClassBasedPeerSelector(t *testing.T) {
+ partitiontest.PartitionTest(t)
+ t.Parallel()
+
+ // Intentionally put the selectors in non-priority order
+ wrappedPeerSelectors := []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelector: mockPeerSelector{},
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelector: mockPeerSelector{},
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 10,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelector: mockPeerSelector{},
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ }
+
+ cps := makeClassBasedPeerSelector(wrappedPeerSelectors)
+
+ // The selectors should be sorted by priority
+ require.Equal(t, 3, len(cps.peerSelectors))
+ require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[0].peerClass)
+ require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[1].peerClass)
+ require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[2].peerClass)
+}
+
+func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
+ partitiontest.PartitionTest(t)
+ t.Parallel()
+
+ mockPeer := &peerSelectorPeer{}
+
+ // Create a class based peer selector initially with the first wrapped peer selector not having the peer,
+ // second one having it, and a third one not having it
+ wrappedPeerSelectors := []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelector: mockPeerSelector{
+ mockRankPeer: func(psp *peerSelectorPeer, rank int) (int, int) {
+ return -1, -1
+ },
+ },
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelector: mockPeerSelector{
+ mockRankPeer: func(psp *peerSelectorPeer, rank int) (int, int) {
+ if psp == mockPeer {
+ return 10, rank
+ }
+ return -1, -1
+ },
+ },
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelector: mockPeerSelector{
+ mockRankPeer: func(psp *peerSelectorPeer, rank int) (int, int) {
+ return -1, -1
+ },
+ },
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ }
+ cps := makeClassBasedPeerSelector(wrappedPeerSelectors)
+
+ // Peer is found in second selector, rank is within range for a block found
+ oldRank, newRank := cps.rankPeer(mockPeer, 50)
+
+ require.Equal(t, 10, oldRank)
+ require.Equal(t, 50, newRank)
+ require.Equal(t, 0, cps.peerSelectors[1].downloadFailures)
+
+ // Peer is found in second selector, rank is >= peerRankNoBlockForRound
+ oldRank, newRank = cps.rankPeer(mockPeer, peerRankNoBlockForRound)
+
+ require.Equal(t, 10, oldRank)
+ require.Equal(t, peerRankNoBlockForRound, newRank)
+ require.Equal(t, 1, cps.peerSelectors[1].downloadFailures)
+
+ // We fail to find a block for round 3 more times, so the peer selector should be re-sorted
+ cps.rankPeer(mockPeer, peerRankNoBlockForRound)
+ oldRank, newRank = cps.rankPeer(mockPeer, peerRankNoBlockForRound)
+
+ require.Equal(t, 10, oldRank)
+ require.Equal(t, peerRankNoBlockForRound, newRank)
+ require.Equal(t, 3, cps.peerSelectors[1].downloadFailures)
+
+ oldRank, newRank = cps.rankPeer(mockPeer, peerRankNoBlockForRound)
+ require.Equal(t, 10, oldRank)
+ require.Equal(t, peerRankNoBlockForRound, newRank)
+ // Note that the download failures should be 0 in this position, as the peer selector should have been re-sorted to last
+ require.Equal(t, 0, cps.peerSelectors[1].downloadFailures)
+ require.Equal(t, 4, cps.peerSelectors[2].downloadFailures)
+
+ // Now, feed a peer that is not in any of the selectors - it should return -1, -1
+ mockPeer2 := &peerSelectorPeer{}
+ oldRank, newRank = cps.rankPeer(mockPeer2, 50)
+ require.Equal(t, -1, oldRank)
+ require.Equal(t, -1, newRank)
+}
+
+func TestClassBasedPeerSelector_peerDownloadDurationToRank(t *testing.T) {
+ partitiontest.PartitionTest(t)
+ t.Parallel()
+
+ mockPeer := &peerSelectorPeer{}
+ testDuration := 50 * time.Millisecond
+
+ // Create a class based peer selector initially with the first wrapped peer selector not having the peer,
+ // second one having it, and a third one not having it
+ wrappedPeerSelectors := []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelector: mockPeerSelector{
+ mockPeerDownloadDurationToRank: func(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
+ return peerRankInvalidDownload
+ },
+ },
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelector: mockPeerSelector{
+ mockPeerDownloadDurationToRank: func(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
+ if psp == mockPeer && blockDownloadDuration == testDuration {
+ return peerRank0HighBlockTime
+ }
+ return peerRankInvalidDownload
+ },
+ },
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelector: mockPeerSelector{
+ mockPeerDownloadDurationToRank: func(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
+ return peerRankInvalidDownload
+ },
+ },
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ }
+ cps := makeClassBasedPeerSelector(wrappedPeerSelectors)
+
+ // The peer is found in the second selector, so the rank should be peerRank0HighBlockTime
+ rank := cps.peerDownloadDurationToRank(mockPeer, testDuration)
+ require.Equal(t, peerRank0HighBlockTime, rank)
+
+ // The peer is not found in any of the selectors, so the rank should be peerRankInvalidDownload
+ mockPeer2 := &peerSelectorPeer{}
+
+ rank = cps.peerDownloadDurationToRank(mockPeer2, testDuration)
+ require.Equal(t, peerRankInvalidDownload, rank)
+}
+
+func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
+ partitiontest.PartitionTest(t)
+ t.Parallel()
+
+ mockPeer := &peerSelectorPeer{}
+
+ // Create a class based peer selector initially with the first wrapped peer selector not having any peers,
+ // second one having a peer, and a third one not having any peers
+ wrappedPeerSelectors := []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelector: mockPeerSelector{
+ mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
+ return nil, errPeerSelectorNoPeerPoolsAvailable
+ },
+ },
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelector: mockPeerSelector{
+ mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
+ return mockPeer, nil
+ },
+ },
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelector: mockPeerSelector{
+ mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
+ return nil, errPeerSelectorNoPeerPoolsAvailable
+ },
+ },
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ }
+
+ cps := makeClassBasedPeerSelector(wrappedPeerSelectors)
+
+ peerResult, err := cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, peerResult, mockPeer)
+
+ // Update selector to not return any peers
+ wrappedPeerSelectors[1].peerSelector = mockPeerSelector{
+ mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
+ return nil, errPeerSelectorNoPeerPoolsAvailable
+ },
+ }
+
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, peerResult)
+ require.Equal(t, errPeerSelectorNoPeerPoolsAvailable, err)
+
+ // Create a class based peer selector initially with all wrapped peer selectors having peers.
+ // The peers should always come from the first one repeatedly since rankings are not changed.
+ mockPeer2 := &peerSelectorPeer{}
+ mockPeer3 := &peerSelectorPeer{}
+
+ wrappedPeerSelectors = []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelector: mockPeerSelector{
+ mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
+ return mockPeer, nil
+ },
+ },
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelector: mockPeerSelector{
+ mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
+ return mockPeer2, nil
+ },
+ },
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelector: mockPeerSelector{
+ mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
+ return mockPeer3, nil
+ },
+ },
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ }
+
+ cps = makeClassBasedPeerSelector(wrappedPeerSelectors)
+
+ // We should always get the peer from the top priority selector since rankings are not updated/list is not re-sorted.
+ for i := 0; i < 10; i++ {
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, peerResult, mockPeer)
+ }
+}
From 7e6ed4f22e66908e09206b763778c393eaefe4ee Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Thu, 15 Feb 2024 13:14:32 -0500
Subject: [PATCH 06/15] Simplify logic: remove concept of explicit sorting,
opting instead to check violations of tolerance factor during iteration of
peer selectors in getNextPeer().
---
catchup/classBasedPeerSelector.go | 62 ++++++------------------
catchup/classBasedPeerSelector_test.go | 65 ++++++++++++++++++++++++--
2 files changed, 73 insertions(+), 54 deletions(-)
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index 9fecc23e72..4920afd9c0 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -49,7 +49,6 @@ func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int,
c.mu.Lock()
defer c.mu.Unlock()
- peerSelectorSortNeeded := false
oldRank, newRank := -1, -1
for _, wp := range c.peerSelectors {
// See if the peer is in the class, ranking it appropriately if so
@@ -63,55 +62,12 @@ func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int,
wp.downloadFailures++
}
- // If we have failed more than the tolerance factor, we re-sort the slice of peerSelectors
- if wp.downloadFailures > wp.toleranceFactor {
- peerSelectorSortNeeded = true
- }
break
}
- if peerSelectorSortNeeded {
- c.sortPeerSelectors()
- }
-
return oldRank, newRank
}
-// sortPeerSelectors sorts the peerSelectors by tolerance factor violation, and then by priority
-// It should only be called within a locked context
-func (c *classBasedPeerSelector) sortPeerSelectors() {
- psUnderTolerance := make([]*wrappedPeerSelector, 0, len(c.peerSelectors))
- psOverTolerance := make([]*wrappedPeerSelector, 0, len(c.peerSelectors))
- for _, wp := range c.peerSelectors {
- // If the rankPooledPeerSelector's download failures have not been reset in a while, we reset them
- if time.Since(wp.lastCheckedTime) > lastCheckedDuration {
- wp.downloadFailures = 0
- // Reset again here, so we don't keep resetting the same rankPooledPeerSelector
- wp.lastCheckedTime = time.Now()
- }
-
- if wp.downloadFailures <= wp.toleranceFactor {
- psUnderTolerance = append(psUnderTolerance, wp)
- } else {
- psOverTolerance = append(psOverTolerance, wp)
- }
-
- }
-
- // Sort the two groups by priority
- sortByPriority := func(ps []*wrappedPeerSelector) {
- sort.SliceStable(ps, func(i, j int) bool {
- return ps[i].priority < ps[j].priority
- })
- }
-
- sortByPriority(psUnderTolerance)
- sortByPriority(psOverTolerance)
-
- //Append the two groups back together
- c.peerSelectors = append(psUnderTolerance, psOverTolerance...)
-}
-
func (c *classBasedPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPeer, blockDownloadDuration time.Duration) (rank int) {
c.mu.Lock()
defer c.mu.Unlock()
@@ -133,6 +89,14 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
c.mu.Lock()
defer c.mu.Unlock()
for _, wp := range c.peerSelectors {
+ if time.Since(wp.lastCheckedTime) > lastCheckedDuration {
+ wp.downloadFailures = 0
+ }
+
+ if wp.downloadFailures > wp.toleranceFactor {
+ // peerSelector is disabled for now, we move to the next one
+ continue
+ }
psp, err = wp.peerSelector.getNextPeer()
wp.lastCheckedTime = time.Now()
if err != nil {
@@ -146,14 +110,14 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
return psp, nil
}
// If we reached here, we have exhausted all classes and still have no peers
- return nil, err
+ return nil, errPeerSelectorNoPeerPoolsAvailable
}
type wrappedPeerSelector struct {
- peerSelector peerSelector // The underlying rankPooledPeerSelector for this class
- peerClass network.PeerOption // The class of peers the rankPooledPeerSelector is responsible for
+ peerSelector peerSelector // The underlying peerSelector for this class
+ peerClass network.PeerOption // The class of peers the peerSelector is responsible for
toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's rankPooledPeerSelector
- priority int // The original priority of the rankPooledPeerSelector, used for sorting
+ priority int // The original priority of the peerSelector, used for sorting
downloadFailures int // The number of times we have failed to download a block from this class's rankPooledPeerSelector since it was last reset
- lastCheckedTime time.Time // The last time we tried to use the rankPooledPeerSelector
+ lastCheckedTime time.Time // The last time we tried to use the peerSelector
}
diff --git a/catchup/classBasedPeerSelector_test.go b/catchup/classBasedPeerSelector_test.go
index 0c86bddd56..7be847a490 100644
--- a/catchup/classBasedPeerSelector_test.go
+++ b/catchup/classBasedPeerSelector_test.go
@@ -127,7 +127,7 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
require.Equal(t, peerRankNoBlockForRound, newRank)
require.Equal(t, 1, cps.peerSelectors[1].downloadFailures)
- // We fail to find a block for round 3 more times, so the peer selector should be re-sorted
+ // We fail to find a block for round 3 more times, download failures should reflect that.
cps.rankPeer(mockPeer, peerRankNoBlockForRound)
oldRank, newRank = cps.rankPeer(mockPeer, peerRankNoBlockForRound)
@@ -138,15 +138,17 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
oldRank, newRank = cps.rankPeer(mockPeer, peerRankNoBlockForRound)
require.Equal(t, 10, oldRank)
require.Equal(t, peerRankNoBlockForRound, newRank)
- // Note that the download failures should be 0 in this position, as the peer selector should have been re-sorted to last
- require.Equal(t, 0, cps.peerSelectors[1].downloadFailures)
- require.Equal(t, 4, cps.peerSelectors[2].downloadFailures)
+ require.Equal(t, 4, cps.peerSelectors[1].downloadFailures)
// Now, feed a peer that is not in any of the selectors - it should return -1, -1
mockPeer2 := &peerSelectorPeer{}
oldRank, newRank = cps.rankPeer(mockPeer2, 50)
require.Equal(t, -1, oldRank)
require.Equal(t, -1, newRank)
+
+ // Last sanity check, we should have zero download failures for the first and third selectors
+ require.Equal(t, 0, cps.peerSelectors[0].downloadFailures)
+ require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
}
func TestClassBasedPeerSelector_peerDownloadDurationToRank(t *testing.T) {
@@ -282,6 +284,12 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
return mockPeer, nil
},
+ mockRankPeer: func(psp *peerSelectorPeer, rank int) (int, int) {
+ if psp == mockPeer {
+ return 10, rank
+ }
+ return -1, -1
+ },
},
priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
@@ -293,9 +301,15 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
return mockPeer2, nil
},
+ mockRankPeer: func(psp *peerSelectorPeer, rank int) (int, int) {
+ if psp == mockPeer2 {
+ return 10, rank
+ }
+ return -1, -1
+ },
},
priority: peerRankInitialSecondPriority,
- toleranceFactor: 3,
+ toleranceFactor: 10,
lastCheckedTime: time.Now(),
},
{
@@ -304,6 +318,12 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
mockGetNextPeer: func() (psp *peerSelectorPeer, err error) {
return mockPeer3, nil
},
+ mockRankPeer: func(psp *peerSelectorPeer, rank int) (int, int) {
+ if psp == mockPeer3 {
+ return 10, rank
+ }
+ return -1, -1
+ },
},
priority: peerRankInitialThirdPriority,
toleranceFactor: 3,
@@ -319,4 +339,39 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
require.Nil(t, err)
require.Equal(t, peerResult, mockPeer)
}
+
+ // Okay, record enough download failures to disable the first selector
+ for i := 0; i < 4; i++ {
+ cps.rankPeer(mockPeer, peerRankNoBlockForRound)
+ }
+
+ // Now, we should get the peer from the second selector
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, peerResult, mockPeer2)
+
+ // Sanity check the download failures for each selector
+ require.Equal(t, 4, cps.peerSelectors[0].downloadFailures)
+ require.Equal(t, 0, cps.peerSelectors[1].downloadFailures)
+ require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
+
+ // Now, record download failures just under the tolerance factor for the second selector
+ for i := 0; i < 9; i++ {
+ cps.rankPeer(mockPeer2, peerRankNoBlockForRound)
+ }
+
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, peerResult, mockPeer2)
+
+ // One more should push us to the third selector
+ cps.rankPeer(mockPeer2, peerRankNoBlockForRound)
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, peerResult, mockPeer3)
+
+ // Final sanity check of the download failures for each selector
+ require.Equal(t, 4, cps.peerSelectors[0].downloadFailures)
+ require.Equal(t, 10, cps.peerSelectors[1].downloadFailures)
+ require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
}
From 9d4fa549320e6207b18bc41db17b8da542090bd9 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Thu, 15 Feb 2024 15:54:55 -0500
Subject: [PATCH 07/15] Update catchpointService to use the same
classBasedPeerSelector for all of it's download functionality (implying that
failures around catchpoint downloads for a peer from a class put that class
at risk of being disabled temporarily).
---
catchup/catchpointService.go | 10 ++++------
catchup/classBasedPeerSelector_test.go | 16 ++++++++++++++++
2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/catchup/catchpointService.go b/catchup/catchpointService.go
index 2c64f408aa..af08a426d9 100644
--- a/catchup/catchpointService.go
+++ b/catchup/catchpointService.go
@@ -292,7 +292,6 @@ func (cs *CatchpointCatchupService) processStageLedgerDownload() error {
}
// download balances file.
- ps := cs.makeCatchpointPeerSelector()
lf := makeLedgerFetcher(cs.net, cs.ledgerAccessor, cs.log, cs, cs.config)
attemptsCount := 0
@@ -306,7 +305,7 @@ func (cs *CatchpointCatchupService) processStageLedgerDownload() error {
}
return cs.abort(fmt.Errorf("processStageLedgerDownload failed to reset staging balances : %v", err1))
}
- psp, err1 := ps.getNextPeer()
+ psp, err1 := cs.blocksDownloadPeerSelector.getNextPeer()
if err1 != nil {
err1 = fmt.Errorf("processStageLedgerDownload: catchpoint catchup was unable to obtain a list of peers to retrieve the catchpoint file from")
return cs.abort(err1)
@@ -323,9 +322,9 @@ func (cs *CatchpointCatchupService) processStageLedgerDownload() error {
break
}
// failed to build the merkle trie for the above catchpoint file.
- ps.rankPeer(psp, peerRankInvalidDownload)
+ cs.blocksDownloadPeerSelector.rankPeer(psp, peerRankInvalidDownload)
} else {
- ps.rankPeer(psp, peerRankDownloadFailed)
+ cs.blocksDownloadPeerSelector.rankPeer(psp, peerRankDownloadFailed)
}
// instead of testing for err == cs.ctx.Err() , we'll check on the context itself.
@@ -838,10 +837,9 @@ func (cs *CatchpointCatchupService) checkLedgerDownload() error {
if err != nil {
return fmt.Errorf("failed to parse catchpoint label : %v", err)
}
- ps := cs.makeCatchpointPeerSelector()
ledgerFetcher := makeLedgerFetcher(cs.net, cs.ledgerAccessor, cs.log, cs, cs.config)
for i := 0; i < cs.config.CatchupLedgerDownloadRetryAttempts; i++ {
- psp, peerError := ps.getNextPeer()
+ psp, peerError := cs.blocksDownloadPeerSelector.getNextPeer()
if peerError != nil {
return err
}
diff --git a/catchup/classBasedPeerSelector_test.go b/catchup/classBasedPeerSelector_test.go
index 7be847a490..3589590006 100644
--- a/catchup/classBasedPeerSelector_test.go
+++ b/catchup/classBasedPeerSelector_test.go
@@ -1,3 +1,19 @@
+// Copyright (C) 2019-2024 Algorand, Inc.
+// This file is part of go-algorand
+//
+// go-algorand is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// go-algorand is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with go-algorand. If not, see .
+
package catchup
import (
From c96b2e1f54efce4351a7f5bf1fd9af8f60df5b04 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Thu, 15 Feb 2024 16:32:20 -0500
Subject: [PATCH 08/15] Check against the peerClass when preparing to query for
ranking a peer.
---
catchup/classBasedPeerSelector.go | 7 +++-
catchup/classBasedPeerSelector_test.go | 51 +++++++++++++++++++-------
2 files changed, 43 insertions(+), 15 deletions(-)
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index 4920afd9c0..13d16e075e 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -52,11 +52,16 @@ func (c *classBasedPeerSelector) rankPeer(psp *peerSelectorPeer, rank int) (int,
oldRank, newRank := -1, -1
for _, wp := range c.peerSelectors {
// See if the peer is in the class, ranking it appropriately if so
+ if psp.peerClass != wp.peerClass {
+ continue
+ }
+
oldRank, newRank = wp.peerSelector.rankPeer(psp, rank)
if oldRank < 0 || newRank < 0 {
- // Peer not found in this class
+ // Peer not found in this selector
continue
}
+
// Peer was in this class, if there was any kind of download issue, we increment the failure count
if rank >= peerRankNoBlockForRound {
wp.downloadFailures++
diff --git a/catchup/classBasedPeerSelector_test.go b/catchup/classBasedPeerSelector_test.go
index 3589590006..b546071adc 100644
--- a/catchup/classBasedPeerSelector_test.go
+++ b/catchup/classBasedPeerSelector_test.go
@@ -85,7 +85,9 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
partitiontest.PartitionTest(t)
t.Parallel()
- mockPeer := &peerSelectorPeer{}
+ mockPeer := &peerSelectorPeer{
+ peerClass: network.PeersPhonebookRelays,
+ }
// Create a class based peer selector initially with the first wrapped peer selector not having the peer,
// second one having it, and a third one not having it
@@ -156,12 +158,24 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
require.Equal(t, peerRankNoBlockForRound, newRank)
require.Equal(t, 4, cps.peerSelectors[1].downloadFailures)
- // Now, feed a peer that is not in any of the selectors - it should return -1, -1
- mockPeer2 := &peerSelectorPeer{}
+ // Now, feed peers that are not in any of the selectors - it should return -1, -1
+ mockPeer2 := &peerSelectorPeer{
+ peerClass: network.PeersConnectedIn,
+ }
+
oldRank, newRank = cps.rankPeer(mockPeer2, 50)
require.Equal(t, -1, oldRank)
require.Equal(t, -1, newRank)
+ // While this will match class, the selectors will not have it
+ mockPeer3 := &peerSelectorPeer{
+ peerClass: network.PeersConnectedOut,
+ }
+
+ oldRank, newRank = cps.rankPeer(mockPeer3, 50)
+ require.Equal(t, -1, oldRank)
+ require.Equal(t, -1, newRank)
+
// Last sanity check, we should have zero download failures for the first and third selectors
require.Equal(t, 0, cps.peerSelectors[0].downloadFailures)
require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
@@ -231,7 +245,9 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
partitiontest.PartitionTest(t)
t.Parallel()
- mockPeer := &peerSelectorPeer{}
+ mockPeer := &peerSelectorPeer{
+ peerClass: network.PeersPhonebookRelays,
+ }
// Create a class based peer selector initially with the first wrapped peer selector not having any peers,
// second one having a peer, and a third one not having any peers
@@ -275,7 +291,7 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
peerResult, err := cps.getNextPeer()
require.Nil(t, err)
- require.Equal(t, peerResult, mockPeer)
+ require.Equal(t, mockPeer, peerResult)
// Update selector to not return any peers
wrappedPeerSelectors[1].peerSelector = mockPeerSelector{
@@ -290,8 +306,15 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
// Create a class based peer selector initially with all wrapped peer selectors having peers.
// The peers should always come from the first one repeatedly since rankings are not changed.
- mockPeer2 := &peerSelectorPeer{}
- mockPeer3 := &peerSelectorPeer{}
+ mockPeer = &peerSelectorPeer{
+ peerClass: network.PeersConnectedOut,
+ }
+ mockPeer2 := &peerSelectorPeer{
+ peerClass: network.PeersPhonebookRelays,
+ }
+ mockPeer3 := &peerSelectorPeer{
+ peerClass: network.PeersPhonebookArchivalNodes,
+ }
wrappedPeerSelectors = []*wrappedPeerSelector{
{
@@ -353,7 +376,7 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
for i := 0; i < 10; i++ {
peerResult, err = cps.getNextPeer()
require.Nil(t, err)
- require.Equal(t, peerResult, mockPeer)
+ require.Equal(t, mockPeer, peerResult)
}
// Okay, record enough download failures to disable the first selector
@@ -364,30 +387,30 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
// Now, we should get the peer from the second selector
peerResult, err = cps.getNextPeer()
require.Nil(t, err)
- require.Equal(t, peerResult, mockPeer2)
+ require.Equal(t, mockPeer2, peerResult)
// Sanity check the download failures for each selector
require.Equal(t, 4, cps.peerSelectors[0].downloadFailures)
require.Equal(t, 0, cps.peerSelectors[1].downloadFailures)
require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
- // Now, record download failures just under the tolerance factor for the second selector
- for i := 0; i < 9; i++ {
+ // Now, record download failures just up to the tolerance factor for the second selector
+ for i := 0; i < 10; i++ {
cps.rankPeer(mockPeer2, peerRankNoBlockForRound)
}
peerResult, err = cps.getNextPeer()
require.Nil(t, err)
- require.Equal(t, peerResult, mockPeer2)
+ require.Equal(t, mockPeer2, peerResult)
// One more should push us to the third selector
cps.rankPeer(mockPeer2, peerRankNoBlockForRound)
peerResult, err = cps.getNextPeer()
require.Nil(t, err)
- require.Equal(t, peerResult, mockPeer3)
+ require.Equal(t, mockPeer3, peerResult)
// Final sanity check of the download failures for each selector
require.Equal(t, 4, cps.peerSelectors[0].downloadFailures)
- require.Equal(t, 10, cps.peerSelectors[1].downloadFailures)
+ require.Equal(t, 11, cps.peerSelectors[1].downloadFailures)
require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
}
From 7de8a2fa11a5ffb9b56ff4c0b9e76472adbf8026 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Thu, 15 Feb 2024 16:47:03 -0500
Subject: [PATCH 09/15] Tweak err naming in .
---
catchup/catchpointService.go | 40 ++++++++++++++++++------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/catchup/catchpointService.go b/catchup/catchpointService.go
index af08a426d9..bfb3cc165e 100644
--- a/catchup/catchpointService.go
+++ b/catchup/catchpointService.go
@@ -285,10 +285,10 @@ func (cs *CatchpointCatchupService) processStageLedgerDownload() error {
cs.statsMu.Lock()
label := cs.stats.CatchpointLabel
cs.statsMu.Unlock()
- round, _, err0 := ledgercore.ParseCatchpointLabel(label)
+ round, _, err := ledgercore.ParseCatchpointLabel(label)
- if err0 != nil {
- return cs.abort(fmt.Errorf("processStageLedgerDownload failed to parse label : %v", err0))
+ if err != nil {
+ return cs.abort(fmt.Errorf("processStageLedgerDownload failed to parse label : %v", err))
}
// download balances file.
@@ -298,26 +298,26 @@ func (cs *CatchpointCatchupService) processStageLedgerDownload() error {
for {
attemptsCount++
- err1 := cs.ledgerAccessor.ResetStagingBalances(cs.ctx, true)
- if err1 != nil {
+ err0 := cs.ledgerAccessor.ResetStagingBalances(cs.ctx, true)
+ if err0 != nil {
if cs.ctx.Err() != nil {
return cs.stopOrAbort()
}
- return cs.abort(fmt.Errorf("processStageLedgerDownload failed to reset staging balances : %v", err1))
+ return cs.abort(fmt.Errorf("processStageLedgerDownload failed to reset staging balances : %v", err0))
}
- psp, err1 := cs.blocksDownloadPeerSelector.getNextPeer()
- if err1 != nil {
- err1 = fmt.Errorf("processStageLedgerDownload: catchpoint catchup was unable to obtain a list of peers to retrieve the catchpoint file from")
- return cs.abort(err1)
+ psp, err0 := cs.blocksDownloadPeerSelector.getNextPeer()
+ if err0 != nil {
+ err0 = fmt.Errorf("processStageLedgerDownload: catchpoint catchup was unable to obtain a list of peers to retrieve the catchpoint file from")
+ return cs.abort(err0)
}
peer := psp.Peer
start := time.Now()
- err1 = lf.downloadLedger(cs.ctx, peer, round)
- if err1 == nil {
+ err0 = lf.downloadLedger(cs.ctx, peer, round)
+ if err0 == nil {
cs.log.Infof("ledger downloaded in %d seconds", time.Since(start)/time.Second)
start = time.Now()
- err1 = cs.ledgerAccessor.BuildMerkleTrie(cs.ctx, cs.updateVerifiedCounts)
- if err1 == nil {
+ err0 = cs.ledgerAccessor.BuildMerkleTrie(cs.ctx, cs.updateVerifiedCounts)
+ if err0 == nil {
cs.log.Infof("built merkle trie in %d seconds", time.Since(start)/time.Second)
break
}
@@ -335,15 +335,15 @@ func (cs *CatchpointCatchupService) processStageLedgerDownload() error {
}
if attemptsCount >= cs.config.CatchupLedgerDownloadRetryAttempts {
- err1 = fmt.Errorf("processStageLedgerDownload: catchpoint catchup exceeded number of attempts to retrieve ledger")
- return cs.abort(err1)
+ err0 = fmt.Errorf("processStageLedgerDownload: catchpoint catchup exceeded number of attempts to retrieve ledger")
+ return cs.abort(err0)
}
- cs.log.Warnf("unable to download ledger : %v", err1)
+ cs.log.Warnf("unable to download ledger : %v", err0)
}
- err0 = cs.updateStage(ledger.CatchpointCatchupStateLatestBlockDownload)
- if err0 != nil {
- return cs.abort(fmt.Errorf("processStageLedgerDownload failed to update stage to CatchpointCatchupStateLatestBlockDownload : %v", err0))
+ err = cs.updateStage(ledger.CatchpointCatchupStateLatestBlockDownload)
+ if err != nil {
+ return cs.abort(fmt.Errorf("processStageLedgerDownload failed to update stage to CatchpointCatchupStateLatestBlockDownload : %v", err))
}
return nil
}
From db0b17b3e361a8cf838a1d7349cdbd5bf0878e9f Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Thu, 15 Feb 2024 16:59:55 -0500
Subject: [PATCH 10/15] Move makeCatchpointPeerSelector to the
classBasedPeerSelector file.
---
catchup/catchpointService.go | 25 +------------------------
catchup/classBasedPeerSelector.go | 25 +++++++++++++++++++++++++
2 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/catchup/catchpointService.go b/catchup/catchpointService.go
index bfb3cc165e..3c11d0db73 100644
--- a/catchup/catchpointService.go
+++ b/catchup/catchpointService.go
@@ -803,30 +803,7 @@ func (cs *CatchpointCatchupService) updateBlockRetrievalStatistics(acquiredBlock
}
func (cs *CatchpointCatchupService) initDownloadPeerSelector() {
- cs.blocksDownloadPeerSelector = cs.makeCatchpointPeerSelector()
-}
-
-func (cs *CatchpointCatchupService) makeCatchpointPeerSelector() peerSelector {
- wrappedPeerSelectors := []*wrappedPeerSelector{
- {
- peerClass: network.PeersPhonebookRelays,
- peerSelector: makeRankPooledPeerSelector(cs.net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
- priority: peerRankInitialFirstPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookArchivalNodes,
- peerSelector: makeRankPooledPeerSelector(cs.net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
- priority: peerRankInitialSecondPriority,
- toleranceFactor: 10,
- lastCheckedTime: time.Now(),
- },
- }
-
- return makeClassBasedPeerSelector(wrappedPeerSelectors)
+ cs.blocksDownloadPeerSelector = makeCatchpointPeerSelector(cs.net)
}
// checkLedgerDownload sends a HEAD request to the ledger endpoint of peers to validate the catchpoint's availability
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index 13d16e075e..cb64c4b9f3 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -126,3 +126,28 @@ type wrappedPeerSelector struct {
downloadFailures int // The number of times we have failed to download a block from this class's rankPooledPeerSelector since it was last reset
lastCheckedTime time.Time // The last time we tried to use the peerSelector
}
+
+// makeCatchpointPeerSelector returns a classBasedPeerSelector that selects peers based on their class and response behavior.
+// These are the preferred configurations for the catchpoint service.
+func makeCatchpointPeerSelector(net peersRetriever) peerSelector {
+ wrappedPeerSelectors := []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelector: makeRankPooledPeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelector: makeRankPooledPeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 10,
+ lastCheckedTime: time.Now(),
+ },
+ }
+
+ return makeClassBasedPeerSelector(wrappedPeerSelectors)
+}
From 73249b23f0fbd46c6d2c902dd15d025ac50c7fd7 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Tue, 20 Feb 2024 15:43:47 -0500
Subject: [PATCH 11/15] Introduce integration test for classBasedPeerSelector
that uses exercises using actual peer selectors and expected ranking
behavior.
---
catchup/classBasedPeerSelector_test.go | 88 ++++++++++++++++++++++++++
1 file changed, 88 insertions(+)
diff --git a/catchup/classBasedPeerSelector_test.go b/catchup/classBasedPeerSelector_test.go
index b546071adc..570e4be90b 100644
--- a/catchup/classBasedPeerSelector_test.go
+++ b/catchup/classBasedPeerSelector_test.go
@@ -414,3 +414,91 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
require.Equal(t, 11, cps.peerSelectors[1].downloadFailures)
require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
}
+
+func TestClassBasedPeerSelector_integration(t *testing.T) {
+ partitiontest.PartitionTest(t)
+ t.Parallel()
+
+ mockP1Peer := mockHTTPPeer{address: "p1"}
+ mockP2Peer := mockHTTPPeer{address: "p2"}
+
+ mockP1WrappedPeer := &peerSelectorPeer{&mockP1Peer, network.PeersPhonebookRelays}
+ mockP2WrappedPeer := &peerSelectorPeer{&mockP2Peer, network.PeersPhonebookArchivalNodes}
+
+ net := makePeersRetrieverStub(func(options ...network.PeerOption) []network.Peer {
+ if len(options) > 0 {
+ switch options[0] {
+ case network.PeersPhonebookRelays:
+ return []network.Peer{&mockP1Peer}
+ case network.PeersPhonebookArchivalNodes:
+ return []network.Peer{&mockP2Peer}
+ default:
+ return []network.Peer{&mockP1Peer, &mockP2Peer}
+ }
+ }
+ return nil
+ })
+ // Create a class based peer selector with a few wrapped peer selectors
+ cps := makeCatchpointPeerSelector(net).(*classBasedPeerSelector)
+
+ // We should get the peer from the first priority selector, PeersPhonebookRelays
+ peerResult, err := cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, mockP1WrappedPeer, peerResult)
+
+ // Normal expected usage: rank the peer
+ durationRank := cps.peerDownloadDurationToRank(mockP1WrappedPeer, 500)
+ oldRank, newRank := cps.rankPeer(mockP1WrappedPeer, durationRank)
+
+ require.Equal(t, 0, oldRank)
+ require.Equal(t, durationRank, newRank)
+
+ // Let's simulate a few download failures (not enough to disable the selector)
+ for i := 0; i < 3; i++ {
+ expectedOldRank := newRank
+ peerResult, err = cps.getNextPeer()
+
+ require.Nil(t, err)
+ require.Equal(t, mockP1WrappedPeer, peerResult)
+
+ oldRank, newRank = cps.rankPeer(mockP1WrappedPeer, peerRankNoBlockForRound)
+
+ require.Equal(t, expectedOldRank, oldRank)
+ // Should be increasing with no block penalties
+ require.True(t, newRank >= oldRank)
+ }
+
+ // Sanity check, still should be the same peer (from phonebook selector)
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, mockP1WrappedPeer, peerResult)
+
+ // Rank the peer to follow normal usage
+ durationRank = cps.peerDownloadDurationToRank(mockP1WrappedPeer, 500)
+ expectedOldRank := newRank
+ oldRank, newRank = cps.rankPeer(mockP1WrappedPeer, durationRank)
+
+ require.Equal(t, expectedOldRank, oldRank)
+ // Rank should not go up after successful download
+ require.True(t, newRank <= oldRank)
+
+ // Now, let's simulate enough download failures to disable the first selector
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, mockP1WrappedPeer, peerResult)
+ cps.rankPeer(mockP1WrappedPeer, peerRankNoBlockForRound)
+
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, mockP2WrappedPeer, peerResult)
+
+ // Normal expected usage: rank the peer
+ durationRank = cps.peerDownloadDurationToRank(mockP2WrappedPeer, 500)
+ oldRank, newRank = cps.rankPeer(mockP2WrappedPeer, durationRank)
+
+ require.Equal(t, 0, oldRank)
+ require.Equal(t, durationRank, newRank)
+
+ require.Equal(t, 4, cps.peerSelectors[0].downloadFailures)
+ require.Equal(t, 0, cps.peerSelectors[1].downloadFailures)
+}
From c30c20355aeb34a9ce16b95c18ca421db04d84e9 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Tue, 20 Feb 2024 16:06:25 -0500
Subject: [PATCH 12/15] Simplify makePeerSelector down to a single priority
ordering of nodes connected out, relays, archival nodes.
---
catchup/service.go | 177 +++++++++-------------------------------
catchup/service_test.go | 134 +-----------------------------
2 files changed, 42 insertions(+), 269 deletions(-)
diff --git a/catchup/service.go b/catchup/service.go
index eb9fc2a57a..89b88a7c96 100644
--- a/catchup/service.go
+++ b/catchup/service.go
@@ -73,7 +73,7 @@ type Ledger interface {
WaitMem(r basics.Round) chan struct{}
}
-// Service represents the catchup service. Once started and until it is stopped, it ensures that the ledger is up to date with network.
+// Service represents the catchup service. Once started and until it is stopped, it ensures that the ledger is up-to-date with network.
type Service struct {
// disableSyncRound, provided externally, is the first round we will _not_ fetch from the network
// any round >= disableSyncRound will not be fetched. If set to 0, it will be disregarded.
@@ -492,7 +492,7 @@ func (s *Service) pipelinedFetch(seedLookback uint64) {
}
}()
- ps := createPeerSelector(s.net, s.cfg, true)
+ ps := createPeerSelector(s.net)
if _, err := ps.getNextPeer(); errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
s.log.Debugf("pipelinedFetch: was unable to obtain a peer to retrieve the block from")
return
@@ -752,7 +752,7 @@ func (s *Service) fetchRound(cert agreement.Certificate, verifier *agreement.Asy
peerErrors := map[network.Peer]int{}
blockHash := bookkeeping.BlockHash(cert.Proposal.BlockDigest) // semantic digest (i.e., hash of the block header), not byte-for-byte digest
- ps := createPeerSelector(s.net, s.cfg, false)
+ ps := createPeerSelector(s.net)
for s.ledger.LastRound() < cert.Round {
psp, getPeerErr := ps.getNextPeer()
if getPeerErr != nil {
@@ -784,12 +784,12 @@ func (s *Service) fetchRound(cert agreement.Certificate, verifier *agreement.Asy
time.Sleep(50 * time.Millisecond)
}
if count > errNoBlockForRoundThreshold*10 {
- // for the low number of connected peers (like 2) the following scenatio is possible:
+ // for the low number of connected peers (like 2) the following scenario is possible:
// - both peers do not have the block
// - peer selector punishes one of the peers more than the other
- // - the punoshed peer gets the block, and the less punished peer stucks.
+ // - the punished peer gets the block, and the less punished peer stucks.
// It this case reset the peer selector to let it re-learn priorities.
- ps = createPeerSelector(s.net, s.cfg, false)
+ ps = createPeerSelector(s.net)
}
}
peerErrors[peer]++
@@ -867,137 +867,40 @@ func (s *Service) roundIsNotSupported(nextRound basics.Round) bool {
return true
}
-func createPeerSelector(net network.GossipNode, cfg config.Local, pipelineFetch bool) peerSelector {
- var wrappedPeerSelectors []*wrappedPeerSelector
- //TODO: Revisit this ordering
- if pipelineFetch {
- if cfg.NetAddress != "" && cfg.EnableGossipService { // Relay node
- wrappedPeerSelectors = []*wrappedPeerSelector{
- {
- peerClass: network.PeersConnectedOut,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
- priority: peerRankInitialFirstPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookRelays,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
- priority: peerRankInitialSecondPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookArchivalNodes,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
- priority: peerRankInitialThirdPriority,
- toleranceFactor: 10,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersConnectedIn,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
- priority: peerRankInitialFourthPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- }
- } else {
- wrappedPeerSelectors = []*wrappedPeerSelector{
- {
- peerClass: network.PeersConnectedOut,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
- priority: peerRankInitialFirstPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookRelays,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
- priority: peerRankInitialSecondPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookArchivalNodes,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
- priority: peerRankInitialThirdPriority,
- toleranceFactor: 10,
- lastCheckedTime: time.Now(),
- },
- }
- }
- } else {
- if cfg.NetAddress != "" && cfg.EnableGossipService { // Relay node
- wrappedPeerSelectors = []*wrappedPeerSelector{
- {
- peerClass: network.PeersConnectedOut,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
- priority: peerRankInitialFirstPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersConnectedIn,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
- priority: peerRankInitialSecondPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookArchivalNodes,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
- priority: peerRankInitialThirdPriority,
- toleranceFactor: 10,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookRelays,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
- priority: peerRankInitialFourthPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- }
- } else {
- wrappedPeerSelectors = []*wrappedPeerSelector{
- {
- peerClass: network.PeersConnectedOut,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
- priority: peerRankInitialFirstPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookArchivalNodes,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
- priority: peerRankInitialSecondPriority,
- toleranceFactor: 10,
- lastCheckedTime: time.Now(),
- },
- {
- peerClass: network.PeersPhonebookRelays,
- peerSelector: makeRankPooledPeerSelector(net,
- []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
- priority: peerRankInitialThirdPriority,
- toleranceFactor: 3,
- lastCheckedTime: time.Now(),
- },
- }
- }
+func createPeerSelector(net network.GossipNode) peerSelector {
+ wrappedPeerSelectors := []*wrappedPeerSelector{
+ {
+ peerClass: network.PeersConnectedOut,
+ peerSelector: makeRankPooledPeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
+ priority: peerRankInitialFirstPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookRelays,
+ peerSelector: makeRankPooledPeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
+ priority: peerRankInitialSecondPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersPhonebookArchivalNodes,
+ peerSelector: makeRankPooledPeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
+ priority: peerRankInitialThirdPriority,
+ toleranceFactor: 10,
+ lastCheckedTime: time.Now(),
+ },
+ {
+ peerClass: network.PeersConnectedIn,
+ peerSelector: makeRankPooledPeerSelector(net,
+ []peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
+ priority: peerRankInitialFourthPriority,
+ toleranceFactor: 3,
+ lastCheckedTime: time.Now(),
+ },
}
return makeClassBasedPeerSelector(wrappedPeerSelectors)
diff --git a/catchup/service_test.go b/catchup/service_test.go
index b96aca2933..2f8ce46c1c 100644
--- a/catchup/service_test.go
+++ b/catchup/service_test.go
@@ -958,14 +958,8 @@ func TestCatchupUnmatchedCertificate(t *testing.T) {
func TestCreatePeerSelector(t *testing.T) {
partitiontest.PartitionTest(t)
- // Make Service
- cfg := defaultConfig
-
- // cfg.NetAddress != ""; cfg.EnableGossipService = true; pipelineFetch = true
- cfg.NetAddress = "someAddress"
- cfg.EnableGossipService = true
- s := MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
- ps := createPeerSelector(s.net, s.cfg, true)
+ s := MakeService(logging.Base(), defaultConfig, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
+ ps := createPeerSelector(s.net)
cps, ok := ps.(*classBasedPeerSelector)
require.True(t, ok)
@@ -990,130 +984,6 @@ func TestCreatePeerSelector(t *testing.T) {
require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
require.False(t, cps.peerSelectors[3].lastCheckedTime.IsZero())
-
- // cfg.NetAddress == ""; cfg.EnableGossipService = true; pipelineFetch = true
- cfg.NetAddress = ""
- cfg.EnableGossipService = true
- s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
- ps = createPeerSelector(s.net, s.cfg, true)
-
- cps, ok = ps.(*classBasedPeerSelector)
- require.True(t, ok)
-
- require.Equal(t, 3, len(cps.peerSelectors))
- require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
- require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
- require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
-
- require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[1].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[2].peerClass)
-
- require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
- require.Equal(t, 3, cps.peerSelectors[1].toleranceFactor)
- require.Equal(t, 10, cps.peerSelectors[2].toleranceFactor)
-
- require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
-
- // cfg.NetAddress != ""; cfg.EnableGossipService = false; pipelineFetch = true
- cfg.NetAddress = "someAddress"
- cfg.EnableGossipService = false
- s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
- ps = createPeerSelector(s.net, s.cfg, true)
-
- cps, ok = ps.(*classBasedPeerSelector)
- require.True(t, ok)
-
- require.Equal(t, 3, len(cps.peerSelectors))
- require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
- require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
- require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
-
- require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[1].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[2].peerClass)
-
- require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
- require.Equal(t, 3, cps.peerSelectors[1].toleranceFactor)
- require.Equal(t, 10, cps.peerSelectors[2].toleranceFactor)
-
- require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
-
- // cfg.NetAddress != ""; cfg.EnableGossipService = true; pipelineFetch = false
- cfg.NetAddress = "someAddress"
- cfg.EnableGossipService = true
- s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
- ps = createPeerSelector(s.net, s.cfg, false)
-
- cps, ok = ps.(*classBasedPeerSelector)
- require.True(t, ok)
-
- require.Equal(t, 4, len(cps.peerSelectors))
- require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
- require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
- require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
- require.Equal(t, peerRankInitialFourthPriority, cps.peerSelectors[3].priority)
-
- require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
- require.Equal(t, network.PeersConnectedIn, cps.peerSelectors[1].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[2].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[3].peerClass)
-
- // cfg.NetAddress == ""; cfg.EnableGossipService = true; pipelineFetch = false
- cfg.NetAddress = ""
- cfg.EnableGossipService = true
- s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
- ps = createPeerSelector(s.net, s.cfg, false)
-
- cps, ok = ps.(*classBasedPeerSelector)
- require.True(t, ok)
-
- require.Equal(t, 3, len(cps.peerSelectors))
- require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
- require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
- require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
-
- require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[1].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[2].peerClass)
-
- require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
- require.Equal(t, 10, cps.peerSelectors[1].toleranceFactor)
- require.Equal(t, 3, cps.peerSelectors[2].toleranceFactor)
-
- require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
-
- // cfg.NetAddress != ""; cfg.EnableGossipService = false; pipelineFetch = false
- cfg.NetAddress = "someAddress"
- cfg.EnableGossipService = false
- s = MakeService(logging.Base(), cfg, &httpTestPeerSource{}, new(mockedLedger), &mockedAuthenticator{errorRound: int(0 + 1)}, nil, nil)
- ps = createPeerSelector(s.net, s.cfg, false)
-
- cps, ok = ps.(*classBasedPeerSelector)
- require.True(t, ok)
-
- require.Equal(t, 3, len(cps.peerSelectors))
- require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
- require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
- require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
-
- require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
- require.Equal(t, network.PeersPhonebookArchivalNodes, cps.peerSelectors[1].peerClass)
- require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[2].peerClass)
-
- require.Equal(t, 3, cps.peerSelectors[0].toleranceFactor)
- require.Equal(t, 10, cps.peerSelectors[1].toleranceFactor)
- require.Equal(t, 3, cps.peerSelectors[2].toleranceFactor)
-
- require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
}
func TestServiceStartStop(t *testing.T) {
From 6278b854052b1feaec7651a0b83990659122c930 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Tue, 20 Feb 2024 16:09:24 -0500
Subject: [PATCH 13/15] Adjustments from CR.
---
catchup/service.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/catchup/service.go b/catchup/service.go
index 89b88a7c96..3dc07edfc1 100644
--- a/catchup/service.go
+++ b/catchup/service.go
@@ -454,7 +454,7 @@ func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCo
return false
case errors.As(err, &err1):
if !s.protocolErrorLogged {
- logging.Base().Errorf("fetchAndWrite(%v): unrecoverable protocol err1 detected: %v", r, err)
+ logging.Base().Errorf("fetchAndWrite(%v): unrecoverable protocol error detected: %v", r, err)
s.protocolErrorLogged = true
}
default:
@@ -493,7 +493,7 @@ func (s *Service) pipelinedFetch(seedLookback uint64) {
}()
ps := createPeerSelector(s.net)
- if _, err := ps.getNextPeer(); errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
+ if _, err := ps.getNextPeer(); err != nil {
s.log.Debugf("pipelinedFetch: was unable to obtain a peer to retrieve the block from")
return
}
From 9a2390c834a5df3db8a884840cdcc68bb72711d3 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Wed, 21 Feb 2024 13:16:58 -0500
Subject: [PATCH 14/15] Remove priority field from classBasedPeerSelector
(priority determined by order of wrappedPeerSelectors at construction.
---
catchup/classBasedPeerSelector.go | 9 +--------
catchup/classBasedPeerSelector_test.go | 26 +++++---------------------
catchup/service.go | 8 ++------
catchup/service_test.go | 4 ----
4 files changed, 8 insertions(+), 39 deletions(-)
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index cb64c4b9f3..f40a69abec 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -20,7 +20,6 @@ import (
"errors"
"github.com/algorand/go-algorand/network"
"github.com/algorand/go-deadlock"
- "sort"
"time"
)
@@ -30,16 +29,13 @@ const lastCheckedDuration = 10 * time.Minute
// classBasedPeerSelector is a rankPooledPeerSelector that tracks and ranks classes of peers based on their response behavior.
// It is used to select the most appropriate peers to download blocks from - this is most useful when catching up
// and needing to figure out whether the blocks can be retrieved from relay nodes or require archive nodes.
+// The ordering of the peerSelectors directly determines the priority of the classes of peers.
type classBasedPeerSelector struct {
mu deadlock.Mutex
peerSelectors []*wrappedPeerSelector
}
func makeClassBasedPeerSelector(peerSelectors []*wrappedPeerSelector) *classBasedPeerSelector {
- // Sort the peerSelectors by priority
- sort.SliceStable(peerSelectors, func(i, j int) bool {
- return peerSelectors[i].priority < peerSelectors[j].priority
- })
return &classBasedPeerSelector{
peerSelectors: peerSelectors,
}
@@ -122,7 +118,6 @@ type wrappedPeerSelector struct {
peerSelector peerSelector // The underlying peerSelector for this class
peerClass network.PeerOption // The class of peers the peerSelector is responsible for
toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's rankPooledPeerSelector
- priority int // The original priority of the peerSelector, used for sorting
downloadFailures int // The number of times we have failed to download a block from this class's rankPooledPeerSelector since it was last reset
lastCheckedTime time.Time // The last time we tried to use the peerSelector
}
@@ -135,7 +130,6 @@ func makeCatchpointPeerSelector(net peersRetriever) peerSelector {
peerClass: network.PeersPhonebookRelays,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
- priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -143,7 +137,6 @@ func makeCatchpointPeerSelector(net peersRetriever) peerSelector {
peerClass: network.PeersPhonebookArchivalNodes,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
- priority: peerRankInitialSecondPriority,
toleranceFactor: 10,
lastCheckedTime: time.Now(),
},
diff --git a/catchup/classBasedPeerSelector_test.go b/catchup/classBasedPeerSelector_test.go
index 570e4be90b..74931eb66d 100644
--- a/catchup/classBasedPeerSelector_test.go
+++ b/catchup/classBasedPeerSelector_test.go
@@ -47,27 +47,23 @@ func TestClassBasedPeerSelector_makeClassBasedPeerSelector(t *testing.T) {
partitiontest.PartitionTest(t)
t.Parallel()
- // Intentionally put the selectors in non-priority order
wrappedPeerSelectors := []*wrappedPeerSelector{
{
- peerClass: network.PeersConnectedOut,
+ peerClass: network.PeersPhonebookRelays,
peerSelector: mockPeerSelector{},
- priority: peerRankInitialSecondPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
{
- peerClass: network.PeersPhonebookArchivalNodes,
+ peerClass: network.PeersConnectedOut,
peerSelector: mockPeerSelector{},
- priority: peerRankInitialThirdPriority,
- toleranceFactor: 10,
+ toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
{
- peerClass: network.PeersPhonebookRelays,
+ peerClass: network.PeersPhonebookArchivalNodes,
peerSelector: mockPeerSelector{},
- priority: peerRankInitialFirstPriority,
- toleranceFactor: 3,
+ toleranceFactor: 10,
lastCheckedTime: time.Now(),
},
}
@@ -99,7 +95,6 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
return -1, -1
},
},
- priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -113,7 +108,6 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
return -1, -1
},
},
- priority: peerRankInitialSecondPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -124,7 +118,6 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
return -1, -1
},
},
- priority: peerRankInitialThirdPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -198,7 +191,6 @@ func TestClassBasedPeerSelector_peerDownloadDurationToRank(t *testing.T) {
return peerRankInvalidDownload
},
},
- priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -212,7 +204,6 @@ func TestClassBasedPeerSelector_peerDownloadDurationToRank(t *testing.T) {
return peerRankInvalidDownload
},
},
- priority: peerRankInitialSecondPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -223,7 +214,6 @@ func TestClassBasedPeerSelector_peerDownloadDurationToRank(t *testing.T) {
return peerRankInvalidDownload
},
},
- priority: peerRankInitialThirdPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -259,7 +249,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
return nil, errPeerSelectorNoPeerPoolsAvailable
},
},
- priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -270,7 +259,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
return mockPeer, nil
},
},
- priority: peerRankInitialSecondPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -281,7 +269,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
return nil, errPeerSelectorNoPeerPoolsAvailable
},
},
- priority: peerRankInitialThirdPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -330,7 +317,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
return -1, -1
},
},
- priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -347,7 +333,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
return -1, -1
},
},
- priority: peerRankInitialSecondPriority,
toleranceFactor: 10,
lastCheckedTime: time.Now(),
},
@@ -364,7 +349,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
return -1, -1
},
},
- priority: peerRankInitialThirdPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
diff --git a/catchup/service.go b/catchup/service.go
index 3dc07edfc1..f8142eb97a 100644
--- a/catchup/service.go
+++ b/catchup/service.go
@@ -442,7 +442,7 @@ func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCo
if err != nil {
var errNonSequentialBlockEval ledgercore.ErrNonSequentialBlockEval
var blockInLedgerError ledgercore.BlockInLedgerError
- var err1 protocol.Error
+ var protocolErr protocol.Error
switch {
case errors.As(err, &errNonSequentialBlockEval):
s.log.Infof("fetchAndWrite(%d): no need to re-evaluate historical block", r)
@@ -452,7 +452,7 @@ func (s *Service) fetchAndWrite(ctx context.Context, r basics.Round, prevFetchCo
// only the agreement could have added this block into the ledger, catchup is complete
s.log.Infof("fetchAndWrite(%d): after fetching the block, it is already in the ledger. The catchup is complete", r)
return false
- case errors.As(err, &err1):
+ case errors.As(err, &protocolErr):
if !s.protocolErrorLogged {
logging.Base().Errorf("fetchAndWrite(%v): unrecoverable protocol error detected: %v", r, err)
s.protocolErrorLogged = true
@@ -873,7 +873,6 @@ func createPeerSelector(net network.GossipNode) peerSelector {
peerClass: network.PeersConnectedOut,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
- priority: peerRankInitialFirstPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -881,7 +880,6 @@ func createPeerSelector(net network.GossipNode) peerSelector {
peerClass: network.PeersPhonebookRelays,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
- priority: peerRankInitialSecondPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
@@ -889,7 +887,6 @@ func createPeerSelector(net network.GossipNode) peerSelector {
peerClass: network.PeersPhonebookArchivalNodes,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
- priority: peerRankInitialThirdPriority,
toleranceFactor: 10,
lastCheckedTime: time.Now(),
},
@@ -897,7 +894,6 @@ func createPeerSelector(net network.GossipNode) peerSelector {
peerClass: network.PeersConnectedIn,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
- priority: peerRankInitialFourthPriority,
toleranceFactor: 3,
lastCheckedTime: time.Now(),
},
diff --git a/catchup/service_test.go b/catchup/service_test.go
index 2f8ce46c1c..22a09ade57 100644
--- a/catchup/service_test.go
+++ b/catchup/service_test.go
@@ -965,10 +965,6 @@ func TestCreatePeerSelector(t *testing.T) {
require.True(t, ok)
require.Equal(t, 4, len(cps.peerSelectors))
- require.Equal(t, peerRankInitialFirstPriority, cps.peerSelectors[0].priority)
- require.Equal(t, peerRankInitialSecondPriority, cps.peerSelectors[1].priority)
- require.Equal(t, peerRankInitialThirdPriority, cps.peerSelectors[2].priority)
- require.Equal(t, peerRankInitialFourthPriority, cps.peerSelectors[3].priority)
require.Equal(t, network.PeersConnectedOut, cps.peerSelectors[0].peerClass)
require.Equal(t, network.PeersPhonebookRelays, cps.peerSelectors[1].peerClass)
From c73fa06978cda3207c6a269e61ed145e04172ac7 Mon Sep 17 00:00:00 2001
From: Gary Malouf <982483+gmalouf@users.noreply.github.com>
Date: Wed, 21 Feb 2024 13:53:14 -0500
Subject: [PATCH 15/15] Replace resetting of download failures based on time
with logic that resets when all selectors have been disabled.
---
catchup/classBasedPeerSelector.go | 32 +++++++++++++-------
catchup/classBasedPeerSelector_test.go | 42 ++++++++++++++++----------
catchup/service.go | 4 ---
catchup/service_test.go | 5 ---
4 files changed, 47 insertions(+), 36 deletions(-)
diff --git a/catchup/classBasedPeerSelector.go b/catchup/classBasedPeerSelector.go
index f40a69abec..9ab9e6d71d 100644
--- a/catchup/classBasedPeerSelector.go
+++ b/catchup/classBasedPeerSelector.go
@@ -23,9 +23,6 @@ import (
"time"
)
-// The duration after which we reset the downloadFailures for a rankPooledPeerSelector
-const lastCheckedDuration = 10 * time.Minute
-
// classBasedPeerSelector is a rankPooledPeerSelector that tracks and ranks classes of peers based on their response behavior.
// It is used to select the most appropriate peers to download blocks from - this is most useful when catching up
// and needing to figure out whether the blocks can be retrieved from relay nodes or require archive nodes.
@@ -89,17 +86,24 @@ func (c *classBasedPeerSelector) peerDownloadDurationToRank(psp *peerSelectorPee
func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error) {
c.mu.Lock()
defer c.mu.Unlock()
- for _, wp := range c.peerSelectors {
- if time.Since(wp.lastCheckedTime) > lastCheckedDuration {
- wp.downloadFailures = 0
- }
+ return c.internalGetNextPeer(0)
+}
+// internalGetNextPeer is a helper function that should be called with the lock held
+func (c *classBasedPeerSelector) internalGetNextPeer(recurseCount int8) (psp *peerSelectorPeer, err error) {
+ // Safety check to prevent infinite recursion
+ if recurseCount > 1 {
+ return nil, errPeerSelectorNoPeerPoolsAvailable
+ }
+ selectorDisabledCount := 0
+ for _, wp := range c.peerSelectors {
if wp.downloadFailures > wp.toleranceFactor {
// peerSelector is disabled for now, we move to the next one
+ selectorDisabledCount++
continue
}
psp, err = wp.peerSelector.getNextPeer()
- wp.lastCheckedTime = time.Now()
+
if err != nil {
// This is mostly just future-proofing, as we don't expect any other errors from getNextPeer
if errors.Is(err, errPeerSelectorNoPeerPoolsAvailable) {
@@ -111,6 +115,15 @@ func (c *classBasedPeerSelector) getNextPeer() (psp *peerSelectorPeer, err error
return psp, nil
}
// If we reached here, we have exhausted all classes and still have no peers
+ // IFF all classes are disabled, we reset the downloadFailures for all classes and start over
+ if len(c.peerSelectors) != 0 && selectorDisabledCount == len(c.peerSelectors) {
+ for _, wp := range c.peerSelectors {
+ wp.downloadFailures = 0
+ }
+ // Recurse to try again, we should have at least one class enabled now
+ return c.internalGetNextPeer(recurseCount + 1)
+ }
+ // If we reached here, we have exhausted all classes without finding a peer, not due to all classes being disabled
return nil, errPeerSelectorNoPeerPoolsAvailable
}
@@ -119,7 +132,6 @@ type wrappedPeerSelector struct {
peerClass network.PeerOption // The class of peers the peerSelector is responsible for
toleranceFactor int // The number of times we can net fail for any reason before we move to the next class's rankPooledPeerSelector
downloadFailures int // The number of times we have failed to download a block from this class's rankPooledPeerSelector since it was last reset
- lastCheckedTime time.Time // The last time we tried to use the peerSelector
}
// makeCatchpointPeerSelector returns a classBasedPeerSelector that selects peers based on their class and response behavior.
@@ -131,14 +143,12 @@ func makeCatchpointPeerSelector(net peersRetriever) peerSelector {
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookArchivalNodes,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
toleranceFactor: 10,
- lastCheckedTime: time.Now(),
},
}
diff --git a/catchup/classBasedPeerSelector_test.go b/catchup/classBasedPeerSelector_test.go
index 74931eb66d..0110663f87 100644
--- a/catchup/classBasedPeerSelector_test.go
+++ b/catchup/classBasedPeerSelector_test.go
@@ -52,19 +52,16 @@ func TestClassBasedPeerSelector_makeClassBasedPeerSelector(t *testing.T) {
peerClass: network.PeersPhonebookRelays,
peerSelector: mockPeerSelector{},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersConnectedOut,
peerSelector: mockPeerSelector{},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookArchivalNodes,
peerSelector: mockPeerSelector{},
toleranceFactor: 10,
- lastCheckedTime: time.Now(),
},
}
@@ -96,7 +93,6 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookRelays,
@@ -109,7 +105,6 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookArchivalNodes,
@@ -119,7 +114,6 @@ func TestClassBasedPeerSelector_rankPeer(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
}
cps := makeClassBasedPeerSelector(wrappedPeerSelectors)
@@ -192,7 +186,6 @@ func TestClassBasedPeerSelector_peerDownloadDurationToRank(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookRelays,
@@ -205,7 +198,6 @@ func TestClassBasedPeerSelector_peerDownloadDurationToRank(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookArchivalNodes,
@@ -215,7 +207,6 @@ func TestClassBasedPeerSelector_peerDownloadDurationToRank(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
}
cps := makeClassBasedPeerSelector(wrappedPeerSelectors)
@@ -250,7 +241,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookRelays,
@@ -260,7 +250,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookArchivalNodes,
@@ -270,7 +259,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
}
@@ -318,7 +306,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookRelays,
@@ -334,7 +321,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
},
},
toleranceFactor: 10,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookArchivalNodes,
@@ -350,7 +336,6 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
},
},
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
}
@@ -393,10 +378,35 @@ func TestClassBasedPeerSelector_getNextPeer(t *testing.T) {
require.Nil(t, err)
require.Equal(t, mockPeer3, peerResult)
- // Final sanity check of the download failures for each selector
+ // Check of the download failures for each selector
require.Equal(t, 4, cps.peerSelectors[0].downloadFailures)
require.Equal(t, 11, cps.peerSelectors[1].downloadFailures)
require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
+
+ // Now, record download failures just up to the tolerance factor for the third selector
+ for i := 0; i < 3; i++ {
+ cps.rankPeer(mockPeer3, peerRankNoBlockForRound)
+ }
+
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, mockPeer3, peerResult)
+
+ require.Equal(t, 4, cps.peerSelectors[0].downloadFailures)
+ require.Equal(t, 11, cps.peerSelectors[1].downloadFailures)
+ require.Equal(t, 3, cps.peerSelectors[2].downloadFailures)
+
+ // One more failure should reset ALL download failures (and grab a peer from the first selector)
+ cps.rankPeer(mockPeer3, peerRankNoBlockForRound)
+
+ peerResult, err = cps.getNextPeer()
+ require.Nil(t, err)
+ require.Equal(t, mockPeer, peerResult)
+
+ // Check of the download failures for each selector, should have been reset
+ require.Equal(t, 0, cps.peerSelectors[0].downloadFailures)
+ require.Equal(t, 0, cps.peerSelectors[1].downloadFailures)
+ require.Equal(t, 0, cps.peerSelectors[2].downloadFailures)
}
func TestClassBasedPeerSelector_integration(t *testing.T) {
diff --git a/catchup/service.go b/catchup/service.go
index f8142eb97a..5c6609b236 100644
--- a/catchup/service.go
+++ b/catchup/service.go
@@ -874,28 +874,24 @@ func createPeerSelector(net network.GossipNode) peerSelector {
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedOut}}),
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookRelays,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookRelays}}),
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersPhonebookArchivalNodes,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersPhonebookArchivalNodes}}),
toleranceFactor: 10,
- lastCheckedTime: time.Now(),
},
{
peerClass: network.PeersConnectedIn,
peerSelector: makeRankPooledPeerSelector(net,
[]peerClass{{initialRank: peerRankInitialFirstPriority, peerClass: network.PeersConnectedIn}}),
toleranceFactor: 3,
- lastCheckedTime: time.Now(),
},
}
diff --git a/catchup/service_test.go b/catchup/service_test.go
index 22a09ade57..045a0438f2 100644
--- a/catchup/service_test.go
+++ b/catchup/service_test.go
@@ -975,11 +975,6 @@ func TestCreatePeerSelector(t *testing.T) {
require.Equal(t, 3, cps.peerSelectors[1].toleranceFactor)
require.Equal(t, 10, cps.peerSelectors[2].toleranceFactor)
require.Equal(t, 3, cps.peerSelectors[3].toleranceFactor)
-
- require.False(t, cps.peerSelectors[0].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[1].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[2].lastCheckedTime.IsZero())
- require.False(t, cps.peerSelectors[3].lastCheckedTime.IsZero())
}
func TestServiceStartStop(t *testing.T) {