Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add retry logic to UCP GetAWSResourceWithPost handler #8170

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ require (
github.com/sagikazarmark/locafero v0.6.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/sethvargo/go-retry v0.3.0 // indirect
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this package to simplify our retry logic across the project. Looks like it is well tested with no dependencies so I think it is a good choice. Let's discuss in this PR

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Last commit seems to be from 6 months ago. Just wondering if that could be an issue.

github.com/skeema/knownhosts v1.2.2 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/tidwall/gjson v1.18.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,8 @@ github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA=
github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE=
github.com/sethvargo/go-retry v0.3.0/go.mod h1:mNX17F0C/HguQMyMyJxcnU471gOZGxCLyYaFyAZraas=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
Expand Down
80 changes: 80 additions & 0 deletions pkg/retry/retry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package retry

import (
"context"
"time"

"github.com/sethvargo/go-retry"
)

const (
defaultInterval = 1 * time.Second
defaultMaxRetries = 10
defaultMaxDuration = 60 * time.Second
)

// RetryConfig is the configuration for a retry operation.
type RetryConfig struct {
// BackoffStrategy is the backoff strategy to use.
BackoffStrategy retry.Backoff
}

// Retryer is a utility for retrying functions.
type Retryer struct {
config *RetryConfig
}

// NewNoOpRetryer creates a new Retryer that does not retry.
func NewNoOpRetryer() *Retryer {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is useful for testing. using this retryer should be the same functionality as we have today.

b := retry.NewConstant(1 * time.Second)
b = retry.WithMaxRetries(0, b)

noOpRetryer := NewRetryer(&RetryConfig{
BackoffStrategy: b,
})

return noOpRetryer
}

func DefaultBackoffStrategy() retry.Backoff {
b := retry.NewExponential(1 * time.Second)
b = retry.WithMaxDuration(defaultMaxDuration, b)
b = retry.WithMaxRetries(defaultMaxRetries, b)

return b
}

func NewDefaultRetryer() *Retryer {
defaultRetryer := NewRetryer(&RetryConfig{
BackoffStrategy: DefaultBackoffStrategy(),
})

return defaultRetryer
}

// NewRetryer creates a new Retryer with the given configuration.
func NewRetryer(config *RetryConfig) *Retryer {
retryConfig := &RetryConfig{}

if config != nil {
if config.BackoffStrategy != nil {
retryConfig.BackoffStrategy = config.BackoffStrategy
}
} else {
retryConfig.BackoffStrategy = retry.NewExponential(defaultInterval)
}

return &Retryer{
config: retryConfig,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can retryConfig ever be just empty? Like config is not nil but config.BackOffStrategy is?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that an okay case?

}
}

// RetryFunc retries the given function until it returns nil or the maximum number of retries is reached.
func (r *Retryer) RetryFunc(ctx context.Context, f func(ctx context.Context) error) error {
return retry.Do(ctx, r.config.BackoffStrategy, f)
}

// RetryableError marks an error as retryable.
func RetryableError(err error) error {
return retry.RetryableError(err)
}
83 changes: 83 additions & 0 deletions pkg/retry/retry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package retry

import (
"context"
"errors"
"testing"
"time"

goretry "github.com/sethvargo/go-retry"
"github.com/stretchr/testify/require"
)

func TestNewNoOpRetryer(t *testing.T) {
retryer := NewNoOpRetryer()
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)
require.NotNil(t, retryer.config.BackoffStrategy)

expectedBackoffStrategy := goretry.NewConstant(time.Second * 1)
expectedBackoffStrategy = goretry.WithMaxRetries(0, expectedBackoffStrategy)

require.IsType(t, expectedBackoffStrategy, retryer.config.BackoffStrategy)
}

func TestDefaultBackoffStrategy(t *testing.T) {
backoff := DefaultBackoffStrategy()
require.NotNil(t, backoff)
}

func TestNewDefaultRetryer(t *testing.T) {
retryer := NewDefaultRetryer()
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)
require.NotNil(t, retryer.config.BackoffStrategy)

expectedBackoffStrategy := goretry.NewConstant(time.Second * 1)
expectedBackoffStrategy = goretry.WithMaxRetries(0, expectedBackoffStrategy)

require.IsType(t, expectedBackoffStrategy, retryer.config.BackoffStrategy)
}

func TestNewRetryer(t *testing.T) {
config := &RetryConfig{
BackoffStrategy: goretry.NewConstant(1 * time.Second),
}
Comment on lines +42 to +45
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can test this with a RetryConfig that has a nil BackOffStrategy.

retryer := NewRetryer(config)
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)

retryer = NewRetryer(nil)
require.NotNil(t, retryer)
require.NotNil(t, retryer.config)
require.NotNil(t, retryer.config.BackoffStrategy)
}

func TestRetryer_RetryFunc(t *testing.T) {
retryer := NewDefaultRetryer()
ctx := context.Background()

// Test successful function
err := retryer.RetryFunc(ctx, func(ctx context.Context) error {
return nil
})
require.NoError(t, err)

// Test retryable error
retryCount := 0
err = retryer.RetryFunc(ctx, func(ctx context.Context) error {
retryCount++
if retryCount < 3 {
return RetryableError(errors.New("retryable error"))
}
return nil
})
require.NoError(t, err)
require.Equal(t, 3, retryCount)

// Test non-retryable error
err = retryer.RetryFunc(ctx, func(ctx context.Context) error {
return errors.New("non-retryable error")
})
require.Error(t, err)
}
3 changes: 2 additions & 1 deletion pkg/ucp/frontend/aws/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/radius-project/radius/pkg/armrpc/frontend/defaultoperation"
"github.com/radius-project/radius/pkg/armrpc/frontend/server"
aztoken "github.com/radius-project/radius/pkg/azure/tokencredentials"
"github.com/radius-project/radius/pkg/retry"
"github.com/radius-project/radius/pkg/ucp"
"github.com/radius-project/radius/pkg/ucp/api/v20231001preview"
ucp_aws "github.com/radius-project/radius/pkg/ucp/aws"
Expand Down Expand Up @@ -225,7 +226,7 @@ func (m *Module) Initialize(ctx context.Context) (http.Handler, error) {
OperationType: &v1.OperationType{Type: OperationTypeAWSResource, Method: v1.OperationGetImperative},
ResourceType: OperationTypeAWSResource,
ControllerFactory: func(opt controller.Options) (controller.Controller, error) {
return awsproxy_ctrl.NewGetAWSResourceWithPost(opt, m.AWSClients)
return awsproxy_ctrl.NewGetAWSResourceWithPost(opt, m.AWSClients, retry.NewDefaultRetryer())
},
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func (p *CreateOrUpdateAWSResource) Run(ctx context.Context, w http.ResponseWrit
}

cloudControlOpts := []func(*cloudcontrol.Options){CloudControlRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationWithRegionOption(region)}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed this function to match the cloudcontrol version

cloudFormationOpts := []func(*cloudformation.Options){CloudFormationRegionOption(region)}

// Create and update work differently for AWS - we need to know if the resource
// we're working on exists already.
Expand Down Expand Up @@ -125,7 +125,6 @@ func (p *CreateOrUpdateAWSResource) Run(ctx context.Context, w http.ResponseWrit

if existing {
// Get resource type schema

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extra space

describeTypeOutput, err := p.awsClients.CloudFormation.DescribeType(ctx, &cloudformation.DescribeTypeInput{
Type: types.RegistryTypeResource,
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (p *CreateOrUpdateAWSResourceWithPost) Run(ctx context.Context, w http.Resp
}

cloudControlOpts := []func(*cloudcontrol.Options){CloudControlRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationWithRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationRegionOption(region)}

describeTypeOutput, err := p.awsClients.CloudFormation.DescribeType(ctx, &cloudformation.DescribeTypeInput{
Type: types.RegistryTypeResource,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func (p *DeleteAWSResourceWithPost) Run(ctx context.Context, w http.ResponseWrit
return armrpc_rest.NewBadRequestARMResponse(e), nil
}

cloudFormationOpts := []func(*cloudformation.Options){CloudFormationWithRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationRegionOption(region)}
describeTypeOutput, err := p.awsClients.CloudFormation.DescribeType(ctx, &cloudformation.DescribeTypeInput{
Type: types.RegistryTypeResource,
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Expand Down
38 changes: 27 additions & 11 deletions pkg/ucp/frontend/controller/awsproxy/getawsresourcewithpost.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
"github.com/aws/aws-sdk-go-v2/service/cloudcontrol"
"github.com/aws/aws-sdk-go-v2/service/cloudformation"
"github.com/aws/aws-sdk-go-v2/service/cloudformation/types"

"github.com/radius-project/radius/pkg/retry"
)

var _ armrpc_controller.Controller = (*GetAWSResourceWithPost)(nil)
Expand All @@ -44,13 +46,15 @@ var _ armrpc_controller.Controller = (*GetAWSResourceWithPost)(nil)
type GetAWSResourceWithPost struct {
armrpc_controller.Operation[*datamodel.AWSResource, datamodel.AWSResource]
awsClients ucpaws.Clients
retryer *retry.Retryer
}

// NewGetAWSResourceWithPost creates a new GetAWSResourceWithPost controller with the given options and AWS clients.
func NewGetAWSResourceWithPost(opts armrpc_controller.Options, awsClients ucpaws.Clients) (armrpc_controller.Controller, error) {
func NewGetAWSResourceWithPost(opts armrpc_controller.Options, awsClients ucpaws.Clients, retryer *retry.Retryer) (armrpc_controller.Controller, error) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could consider adding the retryer to the other ucp awsproxy routes too, either in the future or this PR. I wanted to get some feedback first

return &GetAWSResourceWithPost{
Operation: armrpc_controller.NewOperation(opts, armrpc_controller.ResourceOptions[datamodel.AWSResource]{}),
awsClients: awsClients,
retryer: retryer,
}, nil
}

Expand All @@ -77,7 +81,7 @@ func (p *GetAWSResourceWithPost) Run(ctx context.Context, w http.ResponseWriter,
return armrpc_rest.NewBadRequestARMResponse(e), nil
}

cloudFormationOpts := []func(*cloudformation.Options){CloudFormationWithRegionOption(region)}
cloudFormationOpts := []func(*cloudformation.Options){CloudFormationRegionOption(region)}
describeTypeOutput, err := p.awsClients.CloudFormation.DescribeType(ctx, &cloudformation.DescribeTypeInput{
Type: types.RegistryTypeResource,
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Expand All @@ -100,15 +104,27 @@ func (p *GetAWSResourceWithPost) Run(ctx context.Context, w http.ResponseWriter,

cloudcontrolOpts := []func(*cloudcontrol.Options){CloudControlRegionOption(region)}
logger.Info("Fetching resource", "resourceType", serviceCtx.ResourceTypeInAWSFormat(), "resourceID", awsResourceIdentifier)
response, err := p.awsClients.CloudControl.GetResource(ctx, &cloudcontrol.GetResourceInput{
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Identifier: aws.String(awsResourceIdentifier),
}, cloudcontrolOpts...)

if ucpaws.IsAWSResourceNotFoundError(err) {
return armrpc_rest.NewNotFoundMessageResponse(constructNotFoundResponseMessage(middleware.GetRelativePath(p.Options().PathBase, req.URL.Path), awsResourceIdentifier)), nil
} else if err != nil {
return ucpaws.HandleAWSError(err)

var response *cloudcontrol.GetResourceOutput
if err := p.retryer.RetryFunc(ctx, func(ctx context.Context) error {
response, err = p.awsClients.CloudControl.GetResource(ctx, &cloudcontrol.GetResourceInput{
TypeName: to.Ptr(serviceCtx.ResourceTypeInAWSFormat()),
Identifier: aws.String(awsResourceIdentifier),
}, cloudcontrolOpts...)

// If the resource is not found, retry.
if ucpaws.IsAWSResourceNotFoundError(err) {
return retry.RetryableError(err)
}

// If any other error occurs, return the error.
return err
}); err != nil {
if ucpaws.IsAWSResourceNotFoundError(err) {
return armrpc_rest.NewNotFoundMessageResponse(constructNotFoundResponseMessage(middleware.GetRelativePath(p.Options().PathBase, req.URL.Path), awsResourceIdentifier)), nil
} else {
return ucpaws.HandleAWSError(err)
}
}

resourceProperties := map[string]any{}
Expand Down
Loading
Loading