Skip to content

Commit

Permalink
Add waits, check, ctx w/ timeout to stop race cond (#1795)
Browse files Browse the repository at this point in the history
There’s potential for flakiness due to timing issues. For example, the test assumes that:
	•	The ClusterRoleBinding and token are immediately effective.
	•	The curl pod becomes ready promptly.
	•	The metrics endpoint is available as soon as the pod is ready.
Seems like these all can be covered by retry logic in the validate method.

Signed-off-by: Brett Tofel <[email protected]>
  • Loading branch information
bentito authored Feb 26, 2025
1 parent 6cf1853 commit 1573846
Showing 1 changed file with 38 additions and 3 deletions.
41 changes: 38 additions & 3 deletions test/e2e/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ package e2e

import (
"bytes"
"context"
"fmt"
"io"
"os/exec"
"strings"
"testing"
"time"

"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -156,11 +159,43 @@ func (c *MetricsTestConfig) validate(token string) {
require.Contains(c.t, string(output), "200 OK", "Metrics endpoint did not return 200 OK")
}

// cleanup created resources
// cleanup removes the created resources. Uses a context with timeout to prevent hangs.
func (c *MetricsTestConfig) cleanup() {
c.t.Log("Cleaning up resources")
_ = exec.Command(c.client, "delete", "clusterrolebinding", c.clusterBinding, "--ignore-not-found=true").Run()
_ = exec.Command(c.client, "delete", "pod", c.curlPodName, "-n", c.namespace, "--ignore-not-found=true").Run()
_ = exec.Command(c.client, "delete", "clusterrolebinding", c.clusterBinding, "--ignore-not-found=true", "--force").Run()
_ = exec.Command(c.client, "delete", "pod", c.curlPodName, "-n", c.namespace, "--ignore-not-found=true", "--force").Run()

// Create a context with a 60-second timeout.
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()

// Wait for the ClusterRoleBinding to be deleted.
if err := waitForDeletion(ctx, c.client, "clusterrolebinding", c.clusterBinding); err != nil {
c.t.Logf("Error waiting for clusterrolebinding deletion: %v", err)
} else {
c.t.Log("ClusterRoleBinding deleted")
}

// Wait for the Pod to be deleted.
if err := waitForDeletion(ctx, c.client, "pod", c.curlPodName, "-n", c.namespace); err != nil {
c.t.Logf("Error waiting for pod deletion: %v", err)
} else {
c.t.Log("Pod deleted")
}
}

// waitForDeletion uses "kubectl wait" to block until the specified resource is deleted
// or until the 60-second timeout is reached.
func waitForDeletion(ctx context.Context, client, resourceType, resourceName string, extraArgs ...string) error {
args := []string{"wait", "--for=delete", resourceType, resourceName}
args = append(args, extraArgs...)
args = append(args, "--timeout=60s")
cmd := exec.CommandContext(ctx, client, args...)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("error waiting for deletion of %s %s: %v, output: %s", resourceType, resourceName, err, string(output))
}
return nil
}

// getComponentNamespace returns the namespace where operator-controller or catalogd is running
Expand Down

0 comments on commit 1573846

Please sign in to comment.