Skip to content

Commit

Permalink
remote: retry workspace unlock on retryable errors
Browse files Browse the repository at this point in the history
  • Loading branch information
brandonc committed Dec 11, 2024
1 parent a003a6e commit adf6eae
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 1 deletion.
31 changes: 30 additions & 1 deletion internal/backend/remote/backend_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ type remoteClient struct {
forcePush bool
}

// errorUnlockFailed is used within a retry loop to identify a non-retryable
// workspace unlock error
type errorUnlockFailed struct {
innerError error
}

func (e errorUnlockFailed) FatalError() error {
return e.innerError
}

func (e errorUnlockFailed) Error() string {
return e.innerError.Error()
}

var _ Fatal = errorUnlockFailed{}

// Get the remote state.
func (r *remoteClient) Get() (*remote.Payload, error) {
ctx := context.Background()
Expand Down Expand Up @@ -202,7 +218,20 @@ func (r *remoteClient) Unlock(id string) error {
}

// Unlock the workspace.
_, err := r.client.Workspaces.Unlock(ctx, r.workspace.ID)
// Unlock the workspace.
err := RetryBackoff(ctx, func() error {
_, err := r.client.Workspaces.Unlock(ctx, r.workspace.ID)
if err != nil {
if errors.Is(err, tfe.ErrWorkspaceLockedStateVersionStillPending) {
// This is a retryable error.
return err
}
// This will not be retried
return &errorUnlockFailed{innerError: err}
}
return nil
})

if err != nil {
lockErr.Err = err
return lockErr
Expand Down
43 changes: 43 additions & 0 deletions internal/backend/remote/backend_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ package remote
import (
"bytes"
"os"
"strings"
"testing"

"github.com/hashicorp/terraform/internal/backend"
"github.com/hashicorp/terraform/internal/cloud"
"github.com/hashicorp/terraform/internal/states"
"github.com/hashicorp/terraform/internal/states/remote"
"github.com/hashicorp/terraform/internal/states/statefile"
"github.com/hashicorp/terraform/internal/states/statemgr"
)

func TestRemoteClient_impl(t *testing.T) {
Expand Down Expand Up @@ -41,6 +43,47 @@ func TestRemoteClient_stateLock(t *testing.T) {
remote.TestRemoteLocks(t, s1.(*remote.State).Client, s2.(*remote.State).Client)
}

func TestRemoteClient_Unlock_invalidID(t *testing.T) {
b, bCleanup := testBackendDefault(t)
defer bCleanup()

s1, err := b.StateMgr(backend.DefaultStateName)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}

err = s1.Unlock("no")
if err == nil {
t.Fatal("expected error, got nil")
}

if !strings.Contains(err.Error(), "does not match existing lock ID") {
t.Fatalf("expected erroor containing \"does not match existing lock ID\", got %v", err)
}
}

func TestRemoteClient_Unlock(t *testing.T) {
b, bCleanup := testBackendDefault(t)
defer bCleanup()

s1, err := b.StateMgr(backend.DefaultStateName)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}

id, err := s1.Lock(&statemgr.LockInfo{
ID: "test",
})
if err != nil {
t.Fatalf("expected no error, got %v", err)
}

err = s1.Unlock(id)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
}

func TestRemoteClient_Put_withRunID(t *testing.T) {
// Set the TFE_RUN_ID environment variable before creating the client!
if err := os.Setenv("TFE_RUN_ID", cloud.GenerateID("run-")); err != nil {
Expand Down
101 changes: 101 additions & 0 deletions internal/backend/remote/retry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package remote

import (
"context"
"log"
"sync/atomic"
"time"
)

// Fatal implements a RetryBackoff func return value that, if encountered,
// signals that the func should not be retried. In that case, the error
// returned by the interface method will be returned by RetryBackoff
type Fatal interface {
FatalError() error
}

// NonRetryableError is a simple implementation of Fatal that wraps an error
type NonRetryableError struct {
InnerError error
}

// FatalError returns the inner error, but also implements Fatal, which
// signals to RetryBackoff that a non-retryable error occurred.
func (e NonRetryableError) FatalError() error {
return e.InnerError
}

// Error returns the inner error string
func (e NonRetryableError) Error() string {
return e.InnerError.Error()
}

var (
initialBackoffDelay = time.Second
maxBackoffDelay = 3 * time.Second
)

// RetryBackoff retries function f until nil or a FatalError is returned.
// RetryBackoff only returns an error if the context is in error or if a
// FatalError was encountered.
func RetryBackoff(ctx context.Context, f func() error) error {
// doneCh signals that the routine is done and sends the last error
var doneCh = make(chan struct{})
var errVal atomic.Value
type errWrap struct {
E error
}

go func() {
// the retry delay between each attempt
var delay time.Duration = 0
defer close(doneCh)

for {
select {
case <-ctx.Done():
return
case <-time.After(delay):
}

err := f()
switch e := err.(type) {
case nil:
return
case Fatal:
errVal.Store(errWrap{e.FatalError()})
return
}

delay *= 2
if delay == 0 {
delay = initialBackoffDelay
}

delay = min(delay, maxBackoffDelay)

log.Printf("[WARN] retryable error: %q, delaying for %s", err, delay)
}
}()

// Wait until done or deadline
select {
case <-doneCh:
case <-ctx.Done():
}

err, hadErr := errVal.Load().(errWrap)
var lastErr error
if hadErr {
lastErr = err.E
}

if ctx.Err() != nil {
return ctx.Err()
}

return lastErr
}

0 comments on commit adf6eae

Please sign in to comment.