From a28c987f5a604ff66b515dd415270063e6fb069d Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Mon, 10 Oct 2022 15:24:10 +0200 Subject: [PATCH] blocksync: retry requests after timeout (backport #9518) (#9534) * blocksync: retry requests after timeout (#9518) * blocksync: retry requests after timeout * Minimize changes to re-send block request after timeout * TO REVERT: reduce queue capacity * Add reset * Revert "TO REVERT: reduce queue capacity" This reverts commit dd0fee56924c958bed2ab7733e1917eb88fb5957. * 30 seconds * don't reset the timer * Update blocksync/pool.go Co-authored-by: Callum Waters Co-authored-by: Sergio Mena Co-authored-by: Callum Waters (cherry picked from commit a371b1e3a8ea7603ada20e21bd6b4d5bf9f664f2) * Add changelog entry Co-authored-by: William Banfield <4561443+williambanfield@users.noreply.github.com> Co-authored-by: Sergio Mena --- CHANGELOG_PENDING.md | 1 + blockchain/v0/pool.go | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index ae38af93745..151b15d0a3b 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -27,3 +27,4 @@ now been fixed - [p2p] \#9500 prevent peers who have errored being added to the peer_set (@jmalicevic) - [indexer] \#9473 fix bug that caused the psql indexer to index empty blocks whenever one of the transactions returned a non zero code. The relevant deduplication logic has been moved within the kv indexer only (@cmwaters) +- [blocksync] \#9518 handle the case when the sending queue is full: retry block request after a timeout diff --git a/blockchain/v0/pool.go b/blockchain/v0/pool.go index 1328bfa2704..e3923b0d9bc 100644 --- a/blockchain/v0/pool.go +++ b/blockchain/v0/pool.go @@ -32,6 +32,7 @@ const ( maxTotalRequesters = 600 maxPendingRequests = maxTotalRequesters maxPendingRequestsPerPeer = 20 + requestRetrySeconds = 30 // Minimum recv rate to ensure we're receiving blocks from a peer fast // enough. If a peer is not sending us data at at least that rate, we @@ -602,7 +603,7 @@ OUTER_LOOP: } peer = bpr.pool.pickIncrAvailablePeer(bpr.height) if peer == nil { - // log.Info("No peers available", "height", height) + bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height) time.Sleep(requestIntervalMS * time.Millisecond) continue PICK_PEER_LOOP } @@ -612,6 +613,7 @@ OUTER_LOOP: bpr.peerID = peer.id bpr.mtx.Unlock() + to := time.NewTimer(requestRetrySeconds * time.Second) // Send request and wait. bpr.pool.sendRequest(bpr.height, peer.id) WAIT_LOOP: @@ -624,6 +626,11 @@ OUTER_LOOP: return case <-bpr.Quit(): return + case <-to.C: + bpr.Logger.Debug("Retrying block request after timeout", "height", bpr.height, "peer", bpr.peerID) + // Simulate a redo + bpr.reset() + continue OUTER_LOOP case peerID := <-bpr.redoCh: if peerID == bpr.peerID { bpr.reset()