Skip to content

Commit

Permalink
fix: No longer delay config-delete messages in the CDS server
Browse files Browse the repository at this point in the history
So far we have added a 5 sec delay before sending our config deletion messages on the CDS server
watchers in order to prevent a race condition where a terminating client receives the config
deletion message from the CDS server before it can actually enter the graceful shutdown cycle,
which causes the immediate dropping of all active client connections. The delay allowed comfortable
time to the client to start graceful shutdown. Unfortunately, the delay also caused unexpected
resets in some hard-to-debug situations, where during adding the Gateway API resources there is a
transient that causes a (delayed) delete due to a missing API resource, which then resets the full
config later when all resources become available.

This change removes the delay in the config-delete path of the CDS server, so all config-delete
updates are immediately sent out. In order to prevent the race condition, the CDS client in
stunnerd ignores delete-config CDS updates all together (but only in stunnerd, the auth-service and
stunnerctl still process them): once started, there is no other way to stop a stunnerd pod than to
delete the Gateway resource.
  • Loading branch information
rg0now committed Nov 28, 2024
1 parent 3451422 commit 032619f
Show file tree
Hide file tree
Showing 11 changed files with 220 additions and 132 deletions.
2 changes: 1 addition & 1 deletion cmd/stunnerctl/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func runConfig(_ *cobra.Command, args []string) error {

confChan := make(chan *stnrv1.StunnerConfig, 8)
if watch {
err := cds.Watch(ctx, confChan)
err := cds.Watch(ctx, confChan, false)
if err != nil {
close(confChan)
return err
Expand Down
12 changes: 6 additions & 6 deletions cmd/stunnerd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ func main() {
configOrigin = cdsAddr.Addr
}

log.Infof("Watching configuration at origin %q", configOrigin)
if err := st.WatchConfig(ctx, configOrigin, conf); err != nil {
log.Infof("Watching configuration at origin %q (ignoring delete-config updates)", configOrigin)
if err := st.WatchConfig(ctx, configOrigin, conf, true); err != nil {
log.Errorf("Could not run config watcher: %s", err.Error())
os.Exit(1)
}
Expand Down Expand Up @@ -195,18 +195,18 @@ func main() {
c.Admin.LogLevel = logLevel
}

// we have working stunnerd: reconcile
log.Debug("Initiating reconciliation")
err := st.Reconcile(c)
log.Trace("Reconciliation ready")
if err != nil {

if err := st.Reconcile(c); err != nil {
if e, ok := err.(stnrv1.ErrRestarted); ok {
log.Debugf("Reconciliation ready: %s", e.Error())
} else {
log.Errorf("Could not reconcile new configuration "+
"(running configuration unchanged): %s", err.Error())
}
}

log.Trace("Reconciliation ready")
}
}
}
8 changes: 4 additions & 4 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func (s *Stunner) GetConfig() *stnrv1.StunnerConfig {
return &c
}

// LoadConfig loads a configuration from an origin. This is a shim wrapper around ConfigOrigin.Load.
// LoadConfig loads a configuration from an origin. This is a shim wrapper around configclient.Load.
func (s *Stunner) LoadConfig(origin string) (*stnrv1.StunnerConfig, error) {
client, err := client.New(origin, s.name, s.logger)
if err != nil {
Expand All @@ -153,12 +153,12 @@ func (s *Stunner) LoadConfig(origin string) (*stnrv1.StunnerConfig, error) {
return client.Load()
}

// WatchConfig watches a configuration from an origin. This is a shim wrapper around ConfigOrigin.Watch.
func (s *Stunner) WatchConfig(ctx context.Context, origin string, ch chan<- *stnrv1.StunnerConfig) error {
// WatchConfig watches a configuration from an origin. This is a shim wrapper around configclient.Watch.
func (s *Stunner) WatchConfig(ctx context.Context, origin string, ch chan<- *stnrv1.StunnerConfig, suppressDelete bool) error {
client, err := client.New(origin, s.name, s.logger)
if err != nil {
return err
}

return client.Watch(ctx, ch)
return client.Watch(ctx, ch, suppressDelete)
}
6 changes: 3 additions & 3 deletions config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func TestStunnerConfigFileWatcher(t *testing.T) {
defer cancel()

url := "file://" + file
err = stunner.WatchConfig(ctx, url, conf)
err = stunner.WatchConfig(ctx, url, conf, false)
assert.NoError(t, err, "creating config watcher")

// nothing should happen here: wait a bit so that the watcher has comfortable time to start
Expand Down Expand Up @@ -269,7 +269,7 @@ func TestStunnerConfigFileWatcherMultiVersion(t *testing.T) {
defer cancel()

url := "file://" + file
err = stunner.WatchConfig(ctx, url, conf)
err = stunner.WatchConfig(ctx, url, conf, false)
assert.NoError(t, err, "creating config watcher")

// nothing should happen here: wait a bit so that the watcher has comfortable time to start
Expand Down Expand Up @@ -410,7 +410,7 @@ func TestStunnerConfigPollerMultiVersion(t *testing.T) {
defer close(conf)

log.Debug("init config poller")
assert.NoError(t, stunner.WatchConfig(ctx, origin, conf), "creating config poller")
assert.NoError(t, stunner.WatchConfig(ctx, origin, conf, true), "creating config poller")

c2, ok := <-conf
assert.True(t, ok, "config emitted")
Expand Down
Loading

0 comments on commit 032619f

Please sign in to comment.