Skip to content

Commit

Permalink
Retry Temporary API Failures in Microcluster (#992)
Browse files Browse the repository at this point in the history
When bootstrapping or starting a cluster, we wait for the k8sd server to be fully ready before interacting with it.
However, there are edge cases—such as during a snap refresh—where the snap attempts to interact with the CLI
(e.g., to configure snap settings) while the database is still initializing.

In these scenarios, immediate failure is unnecessary.
The k8sd client now retries such requests, ensuring smoother operation.

This behavior applies only to specific edge cases where it is known that the microcluster database will eventually become available.
  • Loading branch information
bschimke95 authored Jan 27, 2025
1 parent e7a7e08 commit 283da96
Showing 1 changed file with 37 additions and 5 deletions.
42 changes: 37 additions & 5 deletions src/k8s/pkg/client/k8sd/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,46 @@ import (
"strings"

apiv1 "github.com/canonical/k8s-snap-api/api/v1"
"github.com/canonical/k8s/pkg/log"
"github.com/canonical/k8s/pkg/utils/control"
"github.com/canonical/lxd/shared/api"
"github.com/canonical/microcluster/v2/rest/types"
)

// query is a helper method to wrap common error checking and response handling.
func query[T any](ctx context.Context, c *k8sd, method string, path string, in any, out *T) (T, error) {
if err := c.client.Query(ctx, method, apiv1.K8sdAPIVersion, api.NewURL().Path(strings.Split(path, "/")...), in, out); err != nil {
var zero T
return zero, fmt.Errorf("failed to %s /%s: %w", method, path, err)
// query is a helper method for sending requests to the k8sd client with common error checking and automatic retries.
// It retries on temporary microcluster errors and returns the deserialized response.
func query[T any](ctx context.Context, c *k8sd, method, path string, in any, out *T) (T, error) {
var result T
if out == nil {
return result, fmt.Errorf("out must be a non-nil pointer")
}

retryErr := control.WaitUntilReady(ctx, func() (bool, error) {
err := c.client.Query(ctx, method, apiv1.K8sdAPIVersion, api.NewURL().Path(strings.Split(path, "/")...), in, out)
if err != nil {
if isTemporary(err) {
log.FromContext(ctx).Info("Temporary error from k8sd: %v", err)
return false, nil
}
return false, fmt.Errorf("failed to %s /%s: %w", method, path, err)
}
return true, nil
})

if retryErr != nil {
return result, fmt.Errorf("failed after potential retry: %w", retryErr)
}

return *out, nil
}

// isTemporary checks if an error is temporary and should be retried.
// This function is tightly coupled with the error messages returned by microcluster and
// should not contain any generic error checks.
func isTemporary(err error) bool {
if strings.Contains(err.Error(), string(types.DatabaseStarting)) ||
strings.Contains(err.Error(), "Database is not yet ready") {
return true
}
return false
}

0 comments on commit 283da96

Please sign in to comment.