Skip to content

Commit

Permalink
improve retry mechanism
Browse files Browse the repository at this point in the history
  • Loading branch information
devcorpio committed Feb 22, 2024
1 parent abeeb33 commit 5a0cf94
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 8 deletions.
2 changes: 1 addition & 1 deletion heartbeat/monitors/wrappers/monitorstate/esloader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func TestStatesESLoader(t *testing.T) {

monID := etc.createTestMonitorStateInES(t, testStatus)
// Since we've continued this state it should register the initial state
ms := etc.tracker.GetCurrentState(monID)
ms := etc.tracker.GetCurrentState(monID, RetryConfig{})
require.True(t, ms.StartedAt.After(testStart.Add(-time.Nanosecond)), "timestamp for new state is off")
requireMSStatusCount(t, ms, testStatus, 1)

Expand Down
26 changes: 20 additions & 6 deletions heartbeat/monitors/wrappers/monitorstate/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func (t *Tracker) RecordStatus(sf stdfields.StdMonitorFields, newStatus StateSta
t.mtx.Lock()
defer t.mtx.Unlock()

state := t.GetCurrentState(sf)
state := t.GetCurrentState(sf, RetryConfig{})
if state == nil {
state = newMonitorState(sf, newStatus, 0, t.flappingEnabled)
logp.L().Infof("initializing new state for monitor %s: %s", sf.ID, state.String())
Expand All @@ -75,22 +75,32 @@ func (t *Tracker) RecordStatus(sf stdfields.StdMonitorFields, newStatus StateSta
}

func (t *Tracker) GetCurrentStatus(sf stdfields.StdMonitorFields) StateStatus {
s := t.GetCurrentState(sf)
s := t.GetCurrentState(sf, RetryConfig{})
if s == nil {
return StatusEmpty
}
return s.Status
}

func (t *Tracker) GetCurrentState(sf stdfields.StdMonitorFields) (state *State) {
type RetryConfig struct {
attempts int
waitFn func() time.Duration
}

func (t *Tracker) GetCurrentState(sf stdfields.StdMonitorFields, rc RetryConfig) (state *State) {
if state, ok := t.states[sf.ID]; ok {
return state
}

tries := 3
// Default number of attempts
attempts := 3
if rc.attempts != 0 {
attempts = rc.attempts
}

var loadedState *State
var err error
for i := 0; i < tries; i++ {
for i := 0; i < attempts; i++ {
loadedState, err = t.stateLoader(sf)
if err == nil {
if loadedState != nil {
Expand All @@ -104,12 +114,16 @@ func (t *Tracker) GetCurrentState(sf stdfields.StdMonitorFields) (state *State)
break
}

// Default sleep time
sleepFor := (time.Duration(i*i) * time.Second) + (time.Duration(rand.Intn(500)) * time.Millisecond)
if rc.waitFn != nil {
sleepFor = rc.waitFn()
}
logp.L().Warnf("could not load last externally recorded state, will retry again in %d milliseconds: %w", sleepFor.Milliseconds(), err)
time.Sleep(sleepFor)
}
if err != nil {
logp.L().Warnf("could not load prior state from elasticsearch after %d attempts, will create new state for monitor: %s", tries, sf.ID)
logp.L().Warnf("could not load prior state from elasticsearch after %d attempts, will create new state for monitor: %s", attempts, sf.ID)
}

if loadedState != nil {
Expand Down
16 changes: 15 additions & 1 deletion heartbeat/monitors/wrappers/monitorstate/tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,21 +134,35 @@ func TestDeferredStateLoader(t *testing.T) {
}

func TestStateLoaderRetry(t *testing.T) {
// While testing the sleep time between retries should be negligible
waitFn := func() time.Duration {
return time.Microsecond
}

tests := []struct {
name string
retryable bool
rc RetryConfig
expectedCalls int
}{
{
"should retry 3 times when fails with retryable error",
true,
RetryConfig{waitFn: waitFn},
3,
},
{
"should not retry when fails with non-retryable error",
false,
RetryConfig{waitFn: waitFn},
1,
},
{
"should honour the configured number of attempts when fails with retryable error",
true,
RetryConfig{attempts: 5, waitFn: waitFn},
5,
},
}

for _, tt := range tests {
Expand All @@ -160,7 +174,7 @@ func TestStateLoaderRetry(t *testing.T) {
}

mst := NewTracker(errorStateLoader, true)
mst.GetCurrentState(stdfields.StdMonitorFields{})
mst.GetCurrentState(stdfields.StdMonitorFields{}, tt.rc)

require.Equal(t, calls, tt.expectedCalls)
})
Expand Down

0 comments on commit 5a0cf94

Please sign in to comment.