Skip to content

Commit

Permalink
blocksync: retry requests after timeout (backport tendermint#9518) (t…
Browse files Browse the repository at this point in the history
…endermint#9534)

* blocksync: retry requests after timeout (tendermint#9518)

* blocksync: retry requests after timeout

* Minimize changes to re-send block request after timeout

* TO REVERT: reduce queue capacity

* Add reset

* Revert "TO REVERT: reduce queue capacity"

This reverts commit dd0fee5.

* 30 seconds

* don't reset the timer

* Update blocksync/pool.go

Co-authored-by: Callum Waters <[email protected]>

Co-authored-by: Sergio Mena <[email protected]>
Co-authored-by: Callum Waters <[email protected]>
(cherry picked from commit a371b1e)

* Add changelog entry

Co-authored-by: William Banfield <[email protected]>
Co-authored-by: Sergio Mena <[email protected]>
  • Loading branch information
3 people authored Oct 10, 2022
1 parent 1d160a5 commit a28c987
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG_PENDING.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@
now been fixed
- [p2p] \#9500 prevent peers who have errored being added to the peer_set (@jmalicevic)
- [indexer] \#9473 fix bug that caused the psql indexer to index empty blocks whenever one of the transactions returned a non zero code. The relevant deduplication logic has been moved within the kv indexer only (@cmwaters)
- [blocksync] \#9518 handle the case when the sending queue is full: retry block request after a timeout
9 changes: 8 additions & 1 deletion blockchain/v0/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const (
maxTotalRequesters = 600
maxPendingRequests = maxTotalRequesters
maxPendingRequestsPerPeer = 20
requestRetrySeconds = 30

// Minimum recv rate to ensure we're receiving blocks from a peer fast
// enough. If a peer is not sending us data at at least that rate, we
Expand Down Expand Up @@ -602,7 +603,7 @@ OUTER_LOOP:
}
peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
if peer == nil {
// log.Info("No peers available", "height", height)
bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height)
time.Sleep(requestIntervalMS * time.Millisecond)
continue PICK_PEER_LOOP
}
Expand All @@ -612,6 +613,7 @@ OUTER_LOOP:
bpr.peerID = peer.id
bpr.mtx.Unlock()

to := time.NewTimer(requestRetrySeconds * time.Second)
// Send request and wait.
bpr.pool.sendRequest(bpr.height, peer.id)
WAIT_LOOP:
Expand All @@ -624,6 +626,11 @@ OUTER_LOOP:
return
case <-bpr.Quit():
return
case <-to.C:
bpr.Logger.Debug("Retrying block request after timeout", "height", bpr.height, "peer", bpr.peerID)
// Simulate a redo
bpr.reset()
continue OUTER_LOOP
case peerID := <-bpr.redoCh:
if peerID == bpr.peerID {
bpr.reset()
Expand Down

0 comments on commit a28c987

Please sign in to comment.