Skip to content

Commit

Permalink
new version of CH does not delete api socket (#1706)
Browse files Browse the repository at this point in the history
* new version of CH does not delete api socket

This clean up is needed so vms can be restarted on crash

* fix ci
  • Loading branch information
muhamadazmy authored May 18, 2022
1 parent 3f87879 commit 933cd3b
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 2 deletions.
1 change: 1 addition & 0 deletions pkg/vm/ch.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (

// Run run the machine with cloud-hypervisor
func (m *Machine) Run(ctx context.Context, socket, logs string) error {
_ = os.Remove(socket)

// build command line
args := map[string][]string{
Expand Down
13 changes: 11 additions & 2 deletions pkg/vm/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -483,10 +483,19 @@ func (m *Module) waitAndAdjOom(ctx context.Context, name string) error {
return nil
}

ctx, cancel := context.WithTimeout(ctx, 6*time.Second)
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()

if err := backoff.Retry(check, backoff.WithContext(backoff.NewConstantBackOff(2*time.Second), ctx)); err != nil {
if err := backoff.RetryNotify(
check,
backoff.WithContext(
backoff.NewConstantBackOff(2*time.Second),
ctx,
),
func(err error, d time.Duration) {
log.Debug().Err(err).Str("id", name).Msg("vm is not up yet")
}); err != nil {

return err
}

Expand Down
17 changes: 17 additions & 0 deletions pkg/vm/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"syscall"
"time"

"github.com/patrickmn/go-cache"
Expand Down Expand Up @@ -92,6 +93,8 @@ func (m *Module) monitor(ctx context.Context) error {
return err
}

log.Debug().Int("configured", len(items)).Int("running", len(running)).Msg("vms")

for _, item := range items {
if item.IsDir() {
continue
Expand All @@ -103,7 +106,17 @@ func (m *Module) monitor(ctx context.Context) error {
log.Err(err).Str("id", id).Msg("failed to monitor machine")
}

// remove vm from running vms
delete(running, id)
}

// now we have running vms that shouldn't be running
// because they have no config.
for id, ps := range running {
log.Info().Str("id", id).Msg("machine is running but not configured")
_ = syscall.Kill(ps.Pid, syscall.SIGKILL)
}

return nil
}

Expand Down Expand Up @@ -159,6 +172,10 @@ func (m *Module) monitorID(ctx context.Context, running map[string]Process, id s
if reason == nil {
reason = m.waitAndAdjOom(ctx, id)
}

if reason != nil {
reason = m.withLogs(m.logsPath(id), reason)
}
} else {
reason = fmt.Errorf("deleting vm due to so many crashes")
}
Expand Down

0 comments on commit 933cd3b

Please sign in to comment.