From 376928ca1e9ab901458c25f658176e95621f23b5 Mon Sep 17 00:00:00 2001 From: Andreas Auernhammer Date: Thu, 30 Jan 2025 10:29:28 +0100 Subject: [PATCH] vault: limit token delay to not exceed token TTL (#504) This commit fixes a bug in the Vault token renewal. When KES renews its current token `T1` and receives a new token `T2`, KES waits a certain amount of time before using `T2` to account for replication lag between Vault nodes. Vault tokens are not signed but static secrets. Each Vault node in a dist. cluster needs to know the token before being able to verify it. Without the usage delay described above, KES might send a request to a Vault node that has not received the new token `T2`, yet. Now, KES must also not wait longer then the remaining TTL of the current token `T1`. Otherwise, `T1` expires BEFORE KES starts using `T2`. This results in auth errors like the following: ``` Error making API request.\n\nURL: GET http://127.0.0.1:8200/v1/kv/data/my-key3\nCode: 403. Errors:\n\n* 2 errors occurred:\n\t* permission denied\n\t* invalid token\n\n" req.method=POST req.path=/v1/key/create/my-key3 req.ip=127.0.0.1 req.identity=a49fea12c5d1c69f1eba1e4697e62ccdbe389a80f317191892711b47d83c3e85 ``` This commit limits the max. time KES waits before using the new token `T2` to either half the remaining TTL of `T1` or 30s - whatever is shorter. This ensures that `T1` is still valid once KES switches to `T2`. Signed-off-by: Andreas Auernhammer Co-authored-by: Ramon de Klein --- .github/workflows/go.yml | 13 ++++--------- .github/workflows/release.yml | 3 +-- go.mod | 2 ++ internal/keystore/vault/client.go | 13 ++++++++++--- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 87acfe44..a935032c 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -16,8 +16,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.22.7 - check-latest: true + go-version: 1.23.0 id: go - name: Check out code uses: actions/checkout@v4 @@ -34,7 +33,7 @@ jobs: - name: "Set up Go" uses: actions/setup-go@v5 with: - go-version: 1.22.7 + go-version: 1.23.0 id: go - name: Check out code uses: actions/checkout@v4 @@ -54,8 +53,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.22.7 - check-latest: true + go-version: 1.23.0 id: go - name: Check out code uses: actions/checkout@v4 @@ -68,14 +66,11 @@ jobs: vulncheck: name: Vulncheck ${{ matrix.go-version }} runs-on: ubuntu-latest - strategy: - matrix: - go-version: [1.22.7, 1.23.1] steps: - name: Set up Go ${{ matrix.go-version }} uses: actions/setup-go@v5 with: - go-version: ${{ matrix.go-version }} + go-version: 1.23.0 - name: Check out code into the Go module directory uses: actions/checkout@v4 - name: Get govulncheck diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index acd9ede5..267c0e27 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,8 +20,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.22.7 - check-latest: true + go-version: 1.23.0 - name: Set up QEMU uses: docker/setup-qemu-action@v1 - name: Set up Docker Buildx diff --git a/go.mod b/go.mod index 427047ed..f095d74d 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,8 @@ module github.com/minio/kes go 1.21 +toolchain go1.23.5 + require ( aead.dev/mem v0.2.0 aead.dev/minisign v0.2.1 diff --git a/internal/keystore/vault/client.go b/internal/keystore/vault/client.go index 9df392da..bd6ba750 100644 --- a/internal/keystore/vault/client.go +++ b/internal/keystore/vault/client.go @@ -195,8 +195,10 @@ func (c *client) RenewToken(ctx context.Context, authenticate authFunc, secret * continue } - renewIn := 80 * (ttl / 100) // Renew token after 80% of its TTL has passed - ttl = 0 // Set TTL to zero to trigger an immediate re-authentication in case of auth failure + renewIn := 80 * (ttl / 100) // Renew token after 80% of its TTL has passed + delay := min((ttl-renewIn)/2, Delay) // Delay usage of renewed token but not beyond expiry + ttl = 0 + select { case <-ctx.Done(): return @@ -210,6 +212,9 @@ func (c *client) RenewToken(ctx context.Context, authenticate authFunc, secret * if err == nil { break } + if resp, ok := err.(*vaultapi.ResponseError); ok && resp.StatusCode >= 400 && resp.StatusCode < 500 { + break // Don't retry when we receive a 4xx response + } } if s == nil { s, _ = authenticate(ctx) @@ -225,10 +230,12 @@ func (c *client) RenewToken(ctx context.Context, authenticate authFunc, secret * // Wait before we use the new auth. token. This accounts // for replication lag between the Vault nodes and allows // them to sync the token across the entire cluster. + // However, we must not wait longer than the remaining lifetime + // of the currently used token. select { case <-ctx.Done(): return - case <-time.After(Delay): + case <-time.After(delay): } c.SetToken(token) // SetToken is safe to call from different go routines }