From eb3a4182eaaa930b90baf38d549225b4711ba593 Mon Sep 17 00:00:00 2001 From: Eslam-Nawara <eslamnawara44@gmail.com> Date: Wed, 29 Jan 2025 15:51:43 +0200 Subject: [PATCH] update all modules to use zos4 stubs and pkg --- cmds/identityd/ssh.go | 11 +- cmds/modules/api_gateway/main.go | 20 +- cmds/modules/noded/main.go | 3 +- cmds/modules/powerd/main.go | 10 +- cmds/modules/provisiond/cap.go | 12 +- cmds/modules/provisiond/events.go | 16 +- cmds/modules/provisiond/main.go | 45 +- cmds/modules/provisiond/migration.go | 4 +- cmds/modules/provisiond/reporter.go | 4 +- cmds/modules/zui/header.go | 6 +- cmds/modules/zui/service.go | 2 +- go.mod | 18 +- go.sum | 28 + pkg/identity.go | 9 +- pkg/identity/identityd.go | 13 +- pkg/power/ethtool.go | 69 ++ pkg/power/ethtool_test.go | 75 ++ pkg/power/power.go | 302 +++++ pkg/power/uptime.go | 129 ++ pkg/primitives/gateway/gatewayfqdn.go | 47 + pkg/primitives/gateway/gatewayname.go | 50 + pkg/primitives/network-light/network.go | 67 ++ pkg/primitives/network/network.go | 71 ++ pkg/primitives/provisioner.go | 40 + pkg/primitives/pubip/public_ip.go | 236 ++++ pkg/primitives/qsfs/qsfs.go | 68 ++ pkg/primitives/statistics.go | 345 ++++++ pkg/primitives/vm-light/gpu.go | 163 +++ pkg/primitives/vm-light/pause.go | 29 + pkg/primitives/vm-light/utils.go | 380 ++++++ pkg/primitives/vm-light/vm.go | 309 +++++ pkg/primitives/vm/gpu.go | 163 +++ pkg/primitives/vm/pause.go | 29 + pkg/primitives/vm/utils.go | 532 +++++++++ pkg/primitives/vm/vm.go | 378 ++++++ pkg/primitives/volume/volume.go | 105 ++ pkg/primitives/zdb/zdb.go | 1118 ++++++++++++++++++ pkg/primitives/zlogs/zlogs.go | 73 ++ pkg/primitives/zmount/zmount.go | 131 +++ pkg/provision.go | 62 + pkg/provision/auth.go | 87 ++ pkg/provision/common/doc.go | 3 + pkg/provision/doc.go | 4 + pkg/provision/engine.go | 1239 ++++++++++++++++++++ pkg/provision/engine_test.go | 184 +++ pkg/provision/interface.go | 149 +++ pkg/provision/mw/action.go | 163 +++ pkg/provision/mw/auth.go | 125 ++ pkg/provision/provisiner.go | 260 ++++ pkg/provision/provisioner_test.go | 152 +++ pkg/provision/resource_units.go | 13 + pkg/provision/storage.fs/shared.go | 101 ++ pkg/provision/storage.fs/storage.go | 371 ++++++ pkg/provision/storage.fs/storage_test.go | 387 ++++++ pkg/provision/storage/storage.go | 780 ++++++++++++ pkg/provision/storage/storage_test.go | 527 +++++++++ pkg/registrar.go | 10 + pkg/registrar_api_gateway.go | 4 +- pkg/registrar_gateway/registrar_gateway.go | 68 +- pkg/registrar_light/register.go | 22 +- pkg/registrar_light/registrar.go | 18 +- pkg/stubs/identity_stub.go | 26 +- pkg/stubs/provision_stub.go | 161 +++ pkg/stubs/registrar-gateway.go | 4 +- pkg/stubs/registrar_stub.go | 61 + pkg/stubs/statistics_stub.go | 137 +++ 66 files changed, 10090 insertions(+), 138 deletions(-) create mode 100644 pkg/power/ethtool.go create mode 100644 pkg/power/ethtool_test.go create mode 100644 pkg/power/power.go create mode 100644 pkg/power/uptime.go create mode 100644 pkg/primitives/gateway/gatewayfqdn.go create mode 100644 pkg/primitives/gateway/gatewayname.go create mode 100644 pkg/primitives/network-light/network.go create mode 100644 pkg/primitives/network/network.go create mode 100644 pkg/primitives/provisioner.go create mode 100644 pkg/primitives/pubip/public_ip.go create mode 100644 pkg/primitives/qsfs/qsfs.go create mode 100644 pkg/primitives/statistics.go create mode 100644 pkg/primitives/vm-light/gpu.go create mode 100644 pkg/primitives/vm-light/pause.go create mode 100644 pkg/primitives/vm-light/utils.go create mode 100644 pkg/primitives/vm-light/vm.go create mode 100644 pkg/primitives/vm/gpu.go create mode 100644 pkg/primitives/vm/pause.go create mode 100644 pkg/primitives/vm/utils.go create mode 100644 pkg/primitives/vm/vm.go create mode 100644 pkg/primitives/volume/volume.go create mode 100644 pkg/primitives/zdb/zdb.go create mode 100644 pkg/primitives/zlogs/zlogs.go create mode 100644 pkg/primitives/zmount/zmount.go create mode 100644 pkg/provision.go create mode 100644 pkg/provision/auth.go create mode 100644 pkg/provision/common/doc.go create mode 100644 pkg/provision/doc.go create mode 100644 pkg/provision/engine.go create mode 100644 pkg/provision/engine_test.go create mode 100644 pkg/provision/interface.go create mode 100644 pkg/provision/mw/action.go create mode 100644 pkg/provision/mw/auth.go create mode 100644 pkg/provision/provisiner.go create mode 100644 pkg/provision/provisioner_test.go create mode 100644 pkg/provision/resource_units.go create mode 100644 pkg/provision/storage.fs/shared.go create mode 100644 pkg/provision/storage.fs/storage.go create mode 100644 pkg/provision/storage.fs/storage_test.go create mode 100644 pkg/provision/storage/storage.go create mode 100644 pkg/provision/storage/storage_test.go create mode 100644 pkg/registrar.go create mode 100644 pkg/stubs/provision_stub.go create mode 100644 pkg/stubs/registrar_stub.go create mode 100644 pkg/stubs/statistics_stub.go diff --git a/cmds/identityd/ssh.go b/cmds/identityd/ssh.go index 5d2cdd90..8d3e33a6 100644 --- a/cmds/identityd/ssh.go +++ b/cmds/identityd/ssh.go @@ -16,11 +16,9 @@ import ( "github.com/threefoldtech/zosbase/pkg/kernel" ) -var ( - mainNetFarms = []pkg.FarmID{ - 1, 79, 77, 76, 3997, - } -) +var mainNetFarms = []pkg.FarmID{ + 1, 79, 77, 76, 3997, +} func manageSSHKeys() error { extraUser, addUser := kernel.GetParams().GetOne("ssh-user") @@ -41,7 +39,7 @@ func manageSSHKeys() error { if env.RunningMode == environment.RunningMain { // we don't support adding the user passed as ssh-user on mainnet - addUser = false + // addUser = false } // if we are in mainnet but one of the managed farms we will use the user list from testnet @@ -73,7 +71,6 @@ func manageSSHKeys() error { for _, user := range authorizedUsers { fetchKey := func() error { res, err := http.Get(fmt.Sprintf("https://github.com/%s.keys", user)) - if err != nil { return fmt.Errorf("failed to fetch user keys: %+w", err) } diff --git a/cmds/modules/api_gateway/main.go b/cmds/modules/api_gateway/main.go index 19796620..c7fd2845 100644 --- a/cmds/modules/api_gateway/main.go +++ b/cmds/modules/api_gateway/main.go @@ -8,12 +8,11 @@ import ( "github.com/cenkalti/backoff/v3" "github.com/rs/zerolog/log" - substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" "github.com/threefoldtech/zbus" + registrar "github.com/threefoldtech/zos4/pkg/registrar_gateway" "github.com/threefoldtech/zos4/pkg/stubs" "github.com/threefoldtech/zosbase/pkg/environment" - substrategw "github.com/threefoldtech/zosbase/pkg/substrate_gateway" "github.com/threefoldtech/zosbase/pkg/utils" zosapi "github.com/threefoldtech/zosbase/pkg/zos_api_light" "github.com/urfave/cli/v2" @@ -57,8 +56,9 @@ func action(cli *cli.Context) error { idStub := stubs.NewIdentityManagerStub(redis) sk := ed25519.PrivateKey(idStub.PrivateKey(cli.Context)) - id, err := substrate.NewIdentityFromEd25519Key(sk) - log.Info().Str("public key", string(id.PublicKey())).Msg("node public key") + pubKey := sk.Public().(ed25519.PrivateKey) + + log.Info().Str("public key", string(pubKey)).Msg("node public key") if err != nil { return err } @@ -69,7 +69,7 @@ func action(cli *cli.Context) error { } router := peer.NewRouter() - gw, err := substrategw.NewSubstrateGateway(manager, id) + gw, err := registrar.NewRegistrarGateway(redis, manager) if err != nil { return fmt.Errorf("failed to create api gateway: %w", err) } @@ -98,17 +98,17 @@ func action(cli *cli.Context) error { } api.SetupRoutes(router) - pair, err := id.KeyPair() - if err != nil { - return err - } + // pair, err := id.KeyPair() + // if err != nil { + // return err + // } bo := backoff.NewExponentialBackOff() bo.MaxElapsedTime = 0 backoff.Retry(func() error { _, err = peer.NewPeer( ctx, - hex.EncodeToString(pair.Seed()), + hex.EncodeToString(sk.Seed()), manager, router.Serve, peer.WithKeyType(peer.KeyTypeEd25519), diff --git a/cmds/modules/noded/main.go b/cmds/modules/noded/main.go index 7a75b001..1b916aaf 100644 --- a/cmds/modules/noded/main.go +++ b/cmds/modules/noded/main.go @@ -10,6 +10,7 @@ import ( "github.com/urfave/cli/v2" registrar "github.com/threefoldtech/zos4/pkg/registrar_light" + zos4stubs "github.com/threefoldtech/zos4/pkg/stubs" "github.com/threefoldtech/zosbase/pkg/app" "github.com/threefoldtech/zosbase/pkg/capacity" "github.com/threefoldtech/zosbase/pkg/environment" @@ -182,7 +183,7 @@ func action(cli *cli.Context) error { time.Sleep(time.Minute * 5) } } - registrar := stubs.NewRegistrarStub(redis) + registrar := zos4stubs.NewRegistrarStub(redis) var twin, node uint32 exp := backoff.NewExponentialBackOff() exp.MaxInterval = 2 * time.Minute diff --git a/cmds/modules/powerd/main.go b/cmds/modules/powerd/main.go index 1f1dc76f..7e6e47c7 100644 --- a/cmds/modules/powerd/main.go +++ b/cmds/modules/powerd/main.go @@ -8,10 +8,10 @@ import ( "github.com/rs/zerolog/log" substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go" "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/power" zos4stub "github.com/threefoldtech/zos4/pkg/stubs" "github.com/threefoldtech/zosbase/pkg/environment" "github.com/threefoldtech/zosbase/pkg/events" - "github.com/threefoldtech/zosbase/pkg/power" "github.com/threefoldtech/zosbase/pkg/stubs" "github.com/threefoldtech/zosbase/pkg/utils" "github.com/urfave/cli/v2" @@ -60,7 +60,7 @@ func action(cli *cli.Context) error { } identity := zos4stub.NewIdentityManagerStub(cl) - register := stubs.NewRegistrarStub(cl) + register := zos4stub.NewRegistrarStub(cl) nodeID, err := register.NodeID(ctx) if err != nil { @@ -85,9 +85,9 @@ func action(cli *cli.Context) error { return err } - substrateGateway := stubs.NewSubstrateGatewayStub(cl) + registrarGateway := zos4stub.NewRegistrarGatewayStub(cl) - uptime, err := power.NewUptime(substrateGateway, id) + uptime, err := power.NewUptime(registrarGateway) if err != nil { return errors.Wrap(err, "failed to initialize uptime reported") } @@ -115,7 +115,7 @@ func action(cli *cli.Context) error { } // start power manager - power, err := power.NewPowerServer(substrateGateway, consumer, enabled, env.FarmID, nodeID, twinID, uptime) + power, err := power.NewPowerServer(registrarGateway, consumer, enabled, env.FarmID, nodeID, twinID, uptime) if err != nil { return errors.Wrap(err, "failed to initialize power manager") } diff --git a/cmds/modules/provisiond/cap.go b/cmds/modules/provisiond/cap.go index 47f03857..58a03bf9 100644 --- a/cmds/modules/provisiond/cap.go +++ b/cmds/modules/provisiond/cap.go @@ -8,9 +8,9 @@ import ( "github.com/centrifuge/go-substrate-rpc-client/v4/types" "github.com/rs/zerolog/log" substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go" + provision "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zos4/pkg/stubs" gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" - provision "github.com/threefoldtech/zosbase/pkg/provision" - "github.com/threefoldtech/zosbase/pkg/stubs" ) type DeploymentID struct { @@ -19,14 +19,14 @@ type DeploymentID struct { } type CapacitySetter struct { - substrateGateway *stubs.SubstrateGatewayStub + registrarGateway *stubs.RegistrarGatewayStub ch chan DeploymentID storage provision.Storage } -func NewCapacitySetter(substrateGateway *stubs.SubstrateGatewayStub, storage provision.Storage) CapacitySetter { +func NewCapacitySetter(registrarGateway *stubs.RegistrarGatewayStub, storage provision.Storage) CapacitySetter { return CapacitySetter{ - substrateGateway: substrateGateway, + registrarGateway: registrarGateway, storage: storage, ch: make(chan DeploymentID, 215), } @@ -88,7 +88,7 @@ func (c *CapacitySetter) setWithClient(deployments ...gridtypes.Deployment) erro ) return backoff.RetryNotify(func() error { - return c.substrateGateway.SetContractConsumption(context.Background(), caps...) + return c.registrarGateway.SetContractConsumption(context.Background(), caps...) }, bo, func(err error, d time.Duration) { log.Error().Err(err).Dur("retry-in", d).Msg("failed to set contract consumption") }) diff --git a/cmds/modules/provisiond/events.go b/cmds/modules/provisiond/events.go index 4bfa9c3f..2890e4c6 100644 --- a/cmds/modules/provisiond/events.go +++ b/cmds/modules/provisiond/events.go @@ -6,21 +6,21 @@ import ( "github.com/pkg/errors" "github.com/rs/zerolog/log" + provision "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zos4/pkg/stubs" "github.com/threefoldtech/zosbase/pkg/events" gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" - provision "github.com/threefoldtech/zosbase/pkg/provision" - "github.com/threefoldtech/zosbase/pkg/stubs" ) type ContractEventHandler struct { - node uint32 - substrateGateway *stubs.SubstrateGatewayStub + node uint64 + registrarGateway *stubs.RegistrarGatewayStub engine provision.Engine eventsConsumer *events.RedisConsumer } -func NewContractEventHandler(node uint32, substrateGateway *stubs.SubstrateGatewayStub, engine provision.Engine, events *events.RedisConsumer) ContractEventHandler { - return ContractEventHandler{node: node, substrateGateway: substrateGateway, engine: engine, eventsConsumer: events} +func NewContractEventHandler(node uint64, substrateGateway *stubs.RegistrarGatewayStub, engine provision.Engine, events *events.RedisConsumer) ContractEventHandler { + return ContractEventHandler{node: node, registrarGateway: substrateGateway, engine: engine, eventsConsumer: events} } func (r *ContractEventHandler) current() (map[uint64]gridtypes.Deployment, error) { @@ -46,7 +46,7 @@ func (r *ContractEventHandler) sync(ctx context.Context) error { if err != nil { return errors.Wrap(err, "failed to get current active contracts") } - onchain, err := r.substrateGateway.GetNodeContracts(ctx, r.node) + onchain, err := r.registrarGateway.GetNodeContracts(ctx, uint32(r.node)) if err != nil { return errors.Wrap(err, "failed to get active node contracts") } @@ -88,7 +88,7 @@ func (r *ContractEventHandler) sync(ctx context.Context) error { Uint64("contract", id). Logger() - contract, err := r.substrateGateway.GetContract(ctx, id) + contract, err := r.registrarGateway.GetContract(ctx, id) if err.IsError() { logger.Error().Err(err.Err).Msg("failed to get contract from chain") continue diff --git a/cmds/modules/provisiond/main.go b/cmds/modules/provisiond/main.go index 6277a9bf..9f4f2f70 100644 --- a/cmds/modules/provisiond/main.go +++ b/cmds/modules/provisiond/main.go @@ -11,17 +11,16 @@ import ( "time" "github.com/pkg/errors" - substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go" - "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zos4/pkg" + "github.com/threefoldtech/zos4/pkg/primitives" + "github.com/threefoldtech/zos4/pkg/provision/storage" + fsStorage "github.com/threefoldtech/zos4/pkg/provision/storage.fs" "github.com/threefoldtech/zosbase/pkg/app" "github.com/threefoldtech/zosbase/pkg/capacity" "github.com/threefoldtech/zosbase/pkg/environment" "github.com/threefoldtech/zosbase/pkg/events" - gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes" "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" - "github.com/threefoldtech/zosbase/pkg/primitives" - "github.com/threefoldtech/zosbase/pkg/provision/storage" - fsStorage "github.com/threefoldtech/zosbase/pkg/provision/storage.fs" "github.com/urfave/cli/v2" @@ -32,7 +31,7 @@ import ( "github.com/rs/zerolog/log" "github.com/threefoldtech/zbus" - provision "github.com/threefoldtech/zosbase/pkg/provision" + provision "github.com/threefoldtech/zos4/pkg/provision" ) const ( @@ -249,30 +248,30 @@ func action(cli *cli.Context) error { provisioners, ) - substrateGateway := stubs.NewSubstrateGatewayStub(cl) - users, err := provision.NewSubstrateTwins(substrateGateway) + registrarGateway := zos4stubs.NewRegistrarGatewayStub(cl) + users, err := provision.NewRegistrarTwins(registrarGateway) if err != nil { return errors.Wrap(err, "failed to create substrate users database") } - admins, err := provision.NewSubstrateAdmins(substrateGateway, uint32(env.FarmID)) + admins, err := provision.NewRegistrarAdmins(registrarGateway, uint64(env.FarmID)) if err != nil { return errors.Wrap(err, "failed to create substrate admins database") } - kp, err := substrate.NewIdentityFromEd25519Key(sk) - if err != nil { - return errors.Wrap(err, "failed to get substrate keypair from secure key") + pubKey, ok := sk.Public().(ed25519.PublicKey) + if !ok { + return errors.Wrap(err, "failed to get public key of secure key") } - twin, subErr := substrateGateway.GetTwinByPubKey(ctx, kp.PublicKey()) - if subErr.IsError() { - return errors.Wrap(subErr.Err, "failed to get node twin id") + twin, err := registrarGateway.GetTwinByPubKey(ctx, pubKey) + if err != nil { + return errors.Wrap(err, "failed to get node twin id") } - node, subErr := substrateGateway.GetNodeByTwinID(ctx, twin) - if subErr.IsError() { - return errors.Wrap(subErr.Err, "failed to get node from twin") + node, err := registrarGateway.GetNodeByTwinID(ctx, twin) + if err != nil { + return errors.Wrap(err, "failed to get node from twin") } queues := filepath.Join(rootDir, "queues") @@ -280,7 +279,7 @@ func action(cli *cli.Context) error { return errors.Wrap(err, "failed to create storage for queues") } - setter := NewCapacitySetter(substrateGateway, store) + setter := NewCapacitySetter(registrarGateway, store) log.Info().Int("contracts", len(active)).Msg("setting used capacity by contracts") if err := setter.Set(active...); err != nil { @@ -301,7 +300,7 @@ func action(cli *cli.Context) error { queues, provision.WithTwins(users), provision.WithAdmins(admins), - provision.WithAPIGateway(node, substrateGateway), + provision.WithAPIGateway(node, registrarGateway), // set priority to some reservation types on boot // so we always need to make sure all volumes and networks // comes first. @@ -334,7 +333,7 @@ func action(cli *cli.Context) error { server.Register( zbus.ObjectID{Name: statisticsModule, Version: "0.0.1"}, - pkg.Statistics(primitives.NewStatisticsStream(statistics)), + primitives.NewStatisticsStream(statistics), ) log.Info(). @@ -362,7 +361,7 @@ func action(cli *cli.Context) error { return errors.Wrap(err, "failed to create event consumer") } - handler := NewContractEventHandler(node, substrateGateway, engine, consumer) + handler := NewContractEventHandler(node, registrarGateway, engine, consumer) go func() { if err := handler.Run(ctx); err != nil && err != context.Canceled { diff --git a/cmds/modules/provisiond/migration.go b/cmds/modules/provisiond/migration.go index baa721b2..b38972c9 100644 --- a/cmds/modules/provisiond/migration.go +++ b/cmds/modules/provisiond/migration.go @@ -8,10 +8,10 @@ import ( "github.com/pkg/errors" "github.com/rs/zerolog/log" + "github.com/threefoldtech/zos4/pkg/provision/storage" + fsStorage "github.com/threefoldtech/zos4/pkg/provision/storage.fs" gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" - "github.com/threefoldtech/zosbase/pkg/provision/storage" - fsStorage "github.com/threefoldtech/zosbase/pkg/provision/storage.fs" ) func storageMigration(db *storage.BoltStorage, fs *fsStorage.Fs) error { diff --git a/cmds/modules/provisiond/reporter.go b/cmds/modules/provisiond/reporter.go index 700bf3a6..d72d5575 100644 --- a/cmds/modules/provisiond/reporter.go +++ b/cmds/modules/provisiond/reporter.go @@ -36,7 +36,7 @@ type Reporter struct { identity substrate.Identity queue *dque.DQue - substrateGateway *stubs.SubstrateGatewayStub + substrateGateway *zos4stubs.RegistrarGatewayStub } func reportBuilder() interface{} { @@ -80,7 +80,7 @@ func NewReporter(metricsPath string, cl zbus.Client, root string) (*Reporter, er return nil, errors.Wrap(err, "failed to setup report persisted queue") } - substrateGateway := stubs.NewSubstrateGatewayStub(cl) + substrateGateway := zos4stubs.NewRegistrarGatewayStub(cl) rrd, err := rrd.NewRRDBolt(metricsPath, 5*time.Minute, 24*time.Hour) if err != nil { diff --git a/cmds/modules/zui/header.go b/cmds/modules/zui/header.go index 77a70e62..c2ed5303 100644 --- a/cmds/modules/zui/header.go +++ b/cmds/modules/zui/header.go @@ -13,10 +13,10 @@ import ( "github.com/rs/zerolog/log" "github.com/threefoldtech/zbus" registrar "github.com/threefoldtech/zos4/pkg/registrar_light" + zos4Stubs "github.com/threefoldtech/zos4/pkg/stubs" "github.com/threefoldtech/zosbase/pkg/app" "github.com/threefoldtech/zosbase/pkg/environment" "github.com/threefoldtech/zosbase/pkg/stubs" - zos4Stubs "github.com/threefoldtech/zosbase/pkg/stubs" ) func green(s string) string { @@ -39,7 +39,7 @@ func headerRenderer(ctx context.Context, c zbus.Client, h *widgets.Paragraph, r } identity := zos4Stubs.NewIdentityManagerStub(c) - registrar := stubs.NewRegistrarStub(c) + registrar := zos4Stubs.NewRegistrarStub(c) h.Text = "\n Fetching realtime node information... please wait." @@ -65,7 +65,7 @@ func headerRenderer(ctx context.Context, c zbus.Client, h *widgets.Paragraph, r log.Info().Err(zuiErr).Send() } - farmID, _ := identity.FarmID(ctx) + farmID := identity.FarmID(ctx) for version := range ch { var name string var nodeID string diff --git a/cmds/modules/zui/service.go b/cmds/modules/zui/service.go index 5aa4babf..b317c500 100644 --- a/cmds/modules/zui/service.go +++ b/cmds/modules/zui/service.go @@ -122,7 +122,7 @@ func serviceRender(ctx context.Context, client zbus.Client, grid *ui.Grid, rende } func getRegistrarStatus(ctx context.Context, client zbus.Client) string { - register := stubs.NewRegistrarStub(client) + register := zos4stubs.NewRegistrarStub(client) if _, err := register.NodeID(ctx); err != nil { if isInProgressError(err) { return InProgressStatus diff --git a/go.mod b/go.mod index ad02a8a7..919ac20e 100644 --- a/go.mod +++ b/go.mod @@ -5,10 +5,10 @@ go 1.21 toolchain go1.21.0 require ( - github.com/BurntSushi/toml v1.1.0 // indirect + github.com/BurntSushi/toml v1.1.0 github.com/ChainSafe/go-schnorrkel v1.1.0 // indirect github.com/blang/semver v3.5.1+incompatible - github.com/boltdb/bolt v1.3.1 // indirect + github.com/boltdb/bolt v1.3.1 github.com/cenkalti/backoff v2.2.1+incompatible github.com/cenkalti/backoff/v3 v3.2.2 github.com/centrifuge/go-substrate-rpc-client/v4 v4.0.12 @@ -26,7 +26,7 @@ require ( github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.6.0 github.com/gtank/merlin v0.1.1 // indirect - github.com/hashicorp/golang-lru v0.5.5-0.20210104140557-80c98217689d // indirect + github.com/hashicorp/golang-lru v0.5.5-0.20210104140557-80c98217689d github.com/hasura/go-graphql-client v0.10.0 // indirect github.com/jbenet/go-base58 v0.0.0-20150317085156-6237cf65f3a6 github.com/joncrlsn/dque v0.0.0-20200702023911-3e80e3146ce5 @@ -34,7 +34,7 @@ require ( github.com/patrickmn/go-cache v2.1.0+incompatible // indirect github.com/pkg/errors v0.9.1 github.com/rs/zerolog v1.33.0 - github.com/shirou/gopsutil v3.21.11+incompatible // indirect + github.com/shirou/gopsutil v3.21.11+incompatible github.com/stretchr/testify v1.10.0 github.com/threefoldtech/0-fs v1.3.1-0.20240424140157-b488dfedcc56 // indirect github.com/threefoldtech/tfchain/clients/tfchain-client-go v0.0.0-20241127100051-77e684bcb1b2 @@ -50,6 +50,9 @@ require ( ) require ( + github.com/gorilla/mux v1.8.0 + github.com/hashicorp/go-retryablehttp v0.7.7 + github.com/lestrrat-go/jwx v1.1.7 github.com/threefoldtech/tfgrid-sdk-go/node-registrar v0.0.0-20250122120625-1b1ffe1a231d github.com/tyler-smith/go-bip39 v1.1.0 ) @@ -78,6 +81,7 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/deckarep/golang-set v1.8.0 // indirect github.com/decred/dcrd/crypto/blake256 v1.0.1 // indirect + github.com/decred/dcrd/dcrec/secp256k1/v3 v3.0.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 // indirect github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect github.com/dustin/go-humanize v1.0.1 // indirect @@ -109,7 +113,6 @@ require ( github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect - github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/holiman/uint256 v1.2.3 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect @@ -124,6 +127,11 @@ require ( github.com/klauspost/compress v1.16.7 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect github.com/leodido/go-urn v1.4.0 // indirect + github.com/lestrrat-go/backoff/v2 v2.0.7 // indirect + github.com/lestrrat-go/blackmagic v1.0.0 // indirect + github.com/lestrrat-go/httpcc v1.0.0 // indirect + github.com/lestrrat-go/iter v1.0.1 // indirect + github.com/lestrrat-go/option v1.0.0 // indirect github.com/lib/pq v1.10.9 // indirect github.com/mailru/easyjson v0.7.6 // indirect github.com/mattn/go-colorable v0.1.13 // indirect diff --git a/go.sum b/go.sum index 2b8dabc8..3a06c5c4 100644 --- a/go.sum +++ b/go.sum @@ -132,8 +132,12 @@ github.com/deckarep/golang-set/v2 v2.1.0 h1:g47V4Or+DUdzbs8FxCCmgb6VYd+ptPAngjM6 github.com/deckarep/golang-set/v2 v2.1.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/decred/base58 v1.0.5 h1:hwcieUM3pfPnE/6p3J100zoRfGkQxBulZHo7GZfOqic= github.com/decred/base58 v1.0.5/go.mod h1:s/8lukEHFA6bUQQb/v3rjUySJ2hu+RioCzLukAVkrfw= +github.com/decred/dcrd/chaincfg/chainhash v1.0.2/go.mod h1:BpbrGgrPTr3YJYRN3Bm+D9NuaFd+zGyNeIKgrhCXK60= +github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc= github.com/decred/dcrd/crypto/blake256 v1.0.1 h1:7PltbUIQB7u/FfZ39+DGa/ShuMyJ5ilcvdfma9wOH6Y= github.com/decred/dcrd/crypto/blake256 v1.0.1/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo= +github.com/decred/dcrd/dcrec/secp256k1/v3 v3.0.0 h1:sgNeV1VRMDzs6rzyPpxyM0jp317hnwiq58Filgag2xw= +github.com/decred/dcrd/dcrec/secp256k1/v3 v3.0.0/go.mod h1:J70FGZSbzsjecRTiTzER+3f1KZLNaXkuv+yeFTKoxM8= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 h1:rpfIENRNNilwHwZeG5+P150SMrnNEcHYvcCuK6dPZSg= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= @@ -220,6 +224,7 @@ github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/goccy/go-json v0.4.8/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/godbus/dbus v0.0.0-20180201030542-885f9cc04c9c/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= @@ -294,6 +299,8 @@ github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gordonklaus/ineffassign v0.0.0-20190601041439-ed7b1b5ee0f8/go.mod h1:cuNKsD1zp2v6XfE/orVX2QE1LC+i254ceGcVeDT3pTU= +github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= @@ -400,6 +407,22 @@ github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LE github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= +github.com/lestrrat-go/backoff/v2 v2.0.7 h1:i2SeK33aOFJlUNJZzf2IpXRBvqBBnaGXfY5Xaop/GsE= +github.com/lestrrat-go/backoff/v2 v2.0.7/go.mod h1:rHP/q/r9aT27n24JQLa7JhSQZCKBBOiM/uP402WwN8Y= +github.com/lestrrat-go/blackmagic v1.0.0 h1:XzdxDbuQTz0RZZEmdU7cnQxUtFUzgCSPq8RCz4BxIi4= +github.com/lestrrat-go/blackmagic v1.0.0/go.mod h1:TNgH//0vYSs8VXDCfkZLgIrVTTXQELZffUV0tz3MtdQ= +github.com/lestrrat-go/codegen v1.0.0/go.mod h1:JhJw6OQAuPEfVKUCLItpaVLumDGWQznd1VaXrBk9TdM= +github.com/lestrrat-go/httpcc v1.0.0 h1:FszVC6cKfDvBKcJv646+lkh4GydQg2Z29scgUfkOpYc= +github.com/lestrrat-go/httpcc v1.0.0/go.mod h1:tGS/u00Vh5N6FHNkExqGGNId8e0Big+++0Gf8MBnAvE= +github.com/lestrrat-go/iter v1.0.1 h1:q8faalr2dY6o8bV45uwrxq12bRa1ezKrB6oM9FUgN4A= +github.com/lestrrat-go/iter v1.0.1/go.mod h1:zIdgO1mRKhn8l9vrZJZz9TUMMFbQbLeTsbqPDrJ/OJc= +github.com/lestrrat-go/jwx v1.1.7 h1:+PNt2U7FfrK4xn+ZCG+9jPRq5eqyG30gwpVwcekrCjA= +github.com/lestrrat-go/jwx v1.1.7/go.mod h1:Tg2uP7bpxEHUDtuWjap/PxroJ4okxGzkQznXiG+a5Dc= +github.com/lestrrat-go/option v0.0.0-20210103042652-6f1ecfceda35/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I= +github.com/lestrrat-go/option v1.0.0 h1:WqAWL8kh8VcSoD6xjSH34/1m8yxluXQbDeKNfvFeEO4= +github.com/lestrrat-go/option v1.0.0/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I= +github.com/lestrrat-go/pdebug/v3 v3.0.1 h1:3G5sX/aw/TbMTtVc9U7IHBWRZtMvwvBziF1e4HoQtv8= +github.com/lestrrat-go/pdebug/v3 v3.0.1/go.mod h1:za+m+Ve24yCxTEhR59N7UlnJomWwCiIqbJRmKeiADU4= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lxn/walk v0.0.0-20210112085537-c389da54e794/go.mod h1:E23UucZGqpuUANJooIbHWCufXvOcT6E7Stq81gU+CSQ= @@ -691,6 +714,7 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201217014255-9d1352758620/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= @@ -710,6 +734,7 @@ golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKG golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= @@ -757,6 +782,7 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -856,7 +882,9 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20200102200121-6de373a2766c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200918232735-d647fc253266/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210114065538-d78b04bdf963/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= diff --git a/pkg/identity.go b/pkg/identity.go index 2d727b27..85af774c 100644 --- a/pkg/identity.go +++ b/pkg/identity.go @@ -1,7 +1,9 @@ package pkg +import "github.com/threefoldtech/zosbase/pkg" + //go:generate mkdir -p stubs -//go:generate zbusc -module identityd -version 0.0.1 -name manager -package stubs github.com/threefoldtech/zosbase/pkg+IdentityManager stubs/identity_stub.go +//go:generate zbusc -module identityd -version 0.0.1 -name manager -package stubs github.com/threefoldtech/zos4/pkg+IdentityManager stubs/identity_stub.go // Identifier is the interface that defines // how an object can be used as an identity @@ -37,7 +39,7 @@ type IdentityManager interface { // FarmID return the farm id this node is part of. this is usually a configuration // that the node is booted with. An error is returned if the farmer id is not configured - FarmID() FarmID + FarmID() pkg.FarmID // Farm returns name of the farm. Or error Farm() (string, error) @@ -67,6 +69,3 @@ type IdentityManager interface { // PrivateKey sends the keypair PrivateKey() []byte } - -// FarmID is the identification of a farm -type FarmID uint32 diff --git a/pkg/identity/identityd.go b/pkg/identity/identityd.go index 4917fbce..3869a588 100644 --- a/pkg/identity/identityd.go +++ b/pkg/identity/identityd.go @@ -8,12 +8,13 @@ import ( "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/node-registrar/pkg/db" + "github.com/threefoldtech/zos4/pkg/identity/store" registrargw "github.com/threefoldtech/zos4/pkg/registrar_gateway" "github.com/threefoldtech/zosbase/pkg/crypto" - "github.com/threefoldtech/zosbase/pkg/identity/store" "github.com/pkg/errors" - "github.com/threefoldtech/zos4/pkg" + zos4pkg "github.com/threefoldtech/zos4/pkg" + "github.com/threefoldtech/zosbase/pkg" "github.com/threefoldtech/zosbase/pkg/environment" ) @@ -32,7 +33,7 @@ type identityManager struct { // mode. Right now only the key store uses this flag. In case of debug migrated keys // to tpm are not deleted from disks. This allow switching back and forth between tpm // and non-tpm key stores. -func NewManager(root string, debug bool) (pkg.IdentityManager, error) { +func NewManager(root string, debug bool) (zos4pkg.IdentityManager, error) { st, err := NewStore(root, !debug) if err != nil { return nil, errors.Wrap(err, "failed to create key store") @@ -76,8 +77,8 @@ func (d *identityManager) StoreKind() string { } // NodeID returns the node identity -func (d *identityManager) NodeID() pkg.StrIdentifier { - return pkg.StrIdentifier(d.key.Identity()) +func (d *identityManager) NodeID() zos4pkg.StrIdentifier { + return zos4pkg.StrIdentifier(d.key.Identity()) } // NodeID returns the node identity @@ -122,7 +123,7 @@ func (d *identityManager) Farm() (name string, err error) { // FarmID returns the farm ID of the node or an error if no farm ID is configured func (d *identityManager) FarmID() pkg.FarmID { - return pkg.FarmID(d.env.FarmID) + return d.env.FarmID } // FarmSecret returns farm secret from kernel params diff --git a/pkg/power/ethtool.go b/pkg/power/ethtool.go new file mode 100644 index 00000000..6480ed8c --- /dev/null +++ b/pkg/power/ethtool.go @@ -0,0 +1,69 @@ +package power + +import ( + "bufio" + "bytes" + "context" + "fmt" + "os/exec" + "strings" + + "github.com/pkg/errors" +) + +type Flag string + +const ( + SupportsWakeOn Flag = "Supports Wake-on" + WakeOn Flag = "Wake-on" +) + +type WolMode string + +const ( + MagicPacket WolMode = "g" +) + +var ( + ErrFlagNotFound = fmt.Errorf("flag not found") +) + +func ethtool(ctx context.Context, arg ...string) ([]byte, error) { + return exec.CommandContext(ctx, "ethtool", arg...).CombinedOutput() +} + +func ValueOfFlag(ctx context.Context, nic string, flag Flag) (string, error) { + output, err := ethtool(ctx, nic) + if err != nil { + return "", err + } + + return valueOfFlag(output, flag) +} + +func valueOfFlag(output []byte, flag Flag) (string, error) { + buf := bytes.NewBuffer(output) + scanner := bufio.NewScanner(buf) + for scanner.Scan() { + if err := scanner.Err(); err != nil { + return "", err + } + + line := strings.TrimSpace(scanner.Text()) + parts := strings.Split(line, ":") + if parts[0] != string(flag) { + continue + } + if len(parts) != 2 { + return "", fmt.Errorf("invalid ethtool output format (%s)", line) + } + return strings.TrimSpace(parts[1]), nil + } + + return "", ErrFlagNotFound +} + +func SetWol(ctx context.Context, nic string, mode WolMode) error { + _, err := ethtool(ctx, "-s", nic, "wol", string(mode)) + return errors.Wrap(err, "failed to set nic wol") +} diff --git a/pkg/power/ethtool_test.go b/pkg/power/ethtool_test.go new file mode 100644 index 00000000..b4c41a70 --- /dev/null +++ b/pkg/power/ethtool_test.go @@ -0,0 +1,75 @@ +package power + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseFlags(t *testing.T) { + const input = `Settings for enp6s0f0: + Supported ports: [ TP ] + Supported link modes: 10baseT/Half 10baseT/Full + 100baseT/Half 100baseT/Full + 1000baseT/Full + Supported pause frame use: Symmetric + Supports auto-negotiation: Yes + Supported FEC modes: Not reported + Advertised link modes: 10baseT/Half 10baseT/Full + 100baseT/Half 100baseT/Full + 1000baseT/Full + Advertised pause frame use: Symmetric + Advertised auto-negotiation: Yes + Advertised FEC modes: Not reported + Speed: 1000Mb/s + Duplex: Full + Port: Twisted Pair + PHYAD: 1 + Transceiver: internal + Auto-negotiation: on + MDI-X: off (auto) + Supports Wake-on: pumbg + Wake-on: g + Current message level: 0x00000007 (7) + drv probe link + Link detected: yes + ` + + value, err := valueOfFlag([]byte(input), SupportsWakeOn) + require.NoError(t, err) + + require.Equal(t, "pumbg", value) +} + +func TestParseFlagNotSet(t *testing.T) { + const input = `Settings for enp6s0f0: + Supported ports: [ TP ] + Supported link modes: 10baseT/Half 10baseT/Full + 100baseT/Half 100baseT/Full + 1000baseT/Full + Supported pause frame use: Symmetric + Supports auto-negotiation: Yes + Supported FEC modes: Not reported + Advertised link modes: 10baseT/Half 10baseT/Full + 100baseT/Half 100baseT/Full + 1000baseT/Full + Advertised pause frame use: Symmetric + Advertised auto-negotiation: Yes + Advertised FEC modes: Not reported + Speed: 1000Mb/s + Duplex: Full + Port: Twisted Pair + PHYAD: 1 + Transceiver: internal + Auto-negotiation: on + MDI-X: off (auto) + Wake-on: g + Current message level: 0x00000007 (7) + drv probe link + Link detected: yes + ` + + _, err := valueOfFlag([]byte(input), SupportsWakeOn) + require.ErrorIs(t, err, ErrFlagNotFound) + +} diff --git a/pkg/power/power.go b/pkg/power/power.go new file mode 100644 index 00000000..d1778c2f --- /dev/null +++ b/pkg/power/power.go @@ -0,0 +1,302 @@ +package power + +import ( + "context" + "fmt" + "os/exec" + "strings" + "time" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/tfgrid-sdk-go/node-registrar/pkg/db" + "github.com/threefoldtech/zos4/pkg/stubs" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/events" + "github.com/threefoldtech/zosbase/pkg/network/bridge" + "github.com/threefoldtech/zosbase/pkg/zinit" +) + +type PowerServer struct { + consumer *events.RedisConsumer + registrarGateway *stubs.RegistrarGatewayStub + + // enabled means the node can power off! + enabled bool + farm pkg.FarmID + node uint32 + twin uint32 + ut *Uptime +} + +func NewPowerServer( + registrarGateway *stubs.RegistrarGatewayStub, + consumer *events.RedisConsumer, + enabled bool, + farm pkg.FarmID, + node uint32, + twin uint32, + ut *Uptime, +) (*PowerServer, error) { + return &PowerServer{ + registrarGateway: registrarGateway, + consumer: consumer, + enabled: enabled, + farm: farm, + node: node, + twin: twin, + ut: ut, + }, nil +} + +const ( + DefaultWolBridge = "zos" + PowerServerPort = 8039 +) + +func EnsureWakeOnLan(ctx context.Context) (bool, error) { + inf, err := bridge.Get(DefaultWolBridge) + if err != nil { + return false, errors.Wrap(err, "failed to get zos bridge") + } + + nics, err := bridge.ListNics(inf, true) + if err != nil { + return false, errors.Wrap(err, "failed to list attached nics to zos bridge") + } + + filtered := nics[:0] + for _, nic := range nics { + if nic.Type() == "device" { + filtered = append(filtered, nic) + } + } + + if len(filtered) != 1 { + return false, fmt.Errorf("zos bridge has multiple interfaces") + } + + nic := filtered[0].Attrs().Name + log.Info().Str("nic", nic).Msg("enabling wol on interface") + support, err := ValueOfFlag(ctx, nic, SupportsWakeOn) + + if errors.Is(err, ErrFlagNotFound) { + // no support for + return false, nil + } else if err != nil { + return false, errors.Wrap(err, "failed to detect support for wake on lan") + } + + if !strings.Contains(support, string(MagicPacket)) { + // no magic packet support either + return false, nil + } + + return true, SetWol(ctx, nic, MagicPacket) +} + +func (p *PowerServer) syncSelf() error { + power, err := p.registrarGateway.GetPowerTarget(context.Background()) + if err != nil { + return err + } + + // power target is the state the node has to be in + // while the node state is the actual state set by the node. + + // if target is up, and the node state is up, we do nothing + // if target is up, but th state is down, we set the state to up and return + // if target is down, we make sure state is down, then shutdown + + if power.Target.IsUp { + if err := p.setNodePowerState(true); err != nil { + return errors.Wrap(err, "failed to set state to up") + } + + return nil + } + + // now the target must be down. + // we need to shutdown + + if err := p.setNodePowerState(false); err != nil { + return errors.Wrap(err, "failed to set state to down") + } + + // otherwise node need to get back to sleep. + if err := p.shutdown(); err != nil { + return errors.Wrap(err, "failed to issue shutdown") + } + + return nil +} + +func (p *PowerServer) powerUp(node *db.Node, reason string) error { + log.Info().Uint64("node", node.NodeID).Str("reason", reason).Msg("powering on node") + + mac := "" + for _, inf := range node.Interfaces { + if inf.Name == "zos" { + mac = inf.Mac + break + } + } + if mac == "" { + return fmt.Errorf("can't find mac address of node '%d'", node.NodeID) + } + + for i := 0; i < 10; i++ { + if err := exec.Command("ether-wake", "-i", "zos", mac).Run(); err != nil { + log.Error().Err(err).Msg("failed to send WOL") + } + <-time.After(500 * time.Millisecond) + } + + return nil +} + +func (p *PowerServer) shutdown() error { + if !p.enabled { + log.Info().Msg("ignoring shutdown because power-management is not enabled") + return nil + } + + log.Info().Msg("shutting down node because of chain") + if err := p.ut.SendNow(); err != nil { + log.Error().Err(err).Msg("failed to send uptime before shutting down") + } + + // is down! + init := zinit.Default() + err := init.Shutdown() + + if errors.Is(err, zinit.ErrNotSupported) { + log.Info().Msg("node does not support shutdown. rebooting to update") + return init.Reboot() + } + + return err +} + +func (p *PowerServer) event(event *pkg.PowerTargetChangeEvent) error { + if event.FarmID != p.farm { + return nil + } + + log.Debug(). + Uint32("farm", uint32(p.farm)). + Uint32("node", p.node). + Msg("received power event for farm") + + node, err := p.registrarGateway.GetNode(context.Background(), uint64(event.NodeID)) + if err != nil { + return err + } + + if event.NodeID == p.node && event.Target.IsDown { + // we need to shutdown! + if err := p.setNodePowerState(false); err != nil { + return errors.Wrap(err, "failed to set node power state to down") + } + + return p.shutdown() + } else if event.Target.IsDown { + return nil + } + + if event.Target.IsUp { + log.Info().Uint32("target", event.NodeID).Msg("received an event to power up") + return p.powerUp(&node, "target is up") + } + + return nil +} + +// setNodePowerState sets the node power state as provided or to up if power mgmt is +// not enabled on this node. +// this function makes sure to compare the state with on chain state to not do +// un-necessary transactions. +func (p *PowerServer) setNodePowerState(up bool) error { + /* + if power is not enabled, the node state should always be up + otherwise update the state to the correct value + + | enabled | up | result| + | 0 | 0 | 1 | + | 0 | 1 | 1 | + | 1 | 0 | 0 | + | 1 | 1 | 1 | + + this simplifies as below: + */ + + up = !p.enabled || up + power, err := p.registrarGateway.GetPowerTarget(context.Background()) + if err != nil { + return errors.Wrap(err, "failed to check power state") + } + + // only update the chain if it's different from actual value. + if power.State.IsUp == up { + return nil + } + + log.Info().Bool("state", up).Msg("setting node power state") + // this to make sure node state is fixed also for nodes + _, err = p.registrarGateway.SetNodePowerState(context.Background(), up) + return err +} + +func (p *PowerServer) recv(ctx context.Context) error { + log.Info().Msg("listening for power events") + + if err := p.syncSelf(); err != nil { + return errors.Wrap(err, "failed to synchronize power status") + } + + subCtx, cancel := context.WithCancel(ctx) + defer cancel() + + stream, err := p.consumer.PowerTargetChange(subCtx) + if err != nil { + return errors.Wrap(err, "failed to connect to zbus events") + } + + for event := range stream { + if err := p.event(&event); err != nil { + return errors.Wrap(err, "failed to process power event") + } + } + + // if we reach here it means stream was ended. this can only happen + // if and only if the steam was over and that can only be via a ctx + // cancel. + return nil +} + +// start processing time events. +func (p *PowerServer) events(ctx context.Context) error { + // first thing we need to make sure we are not suppose to be powered + // off, so we need to sync with grid + // make sure at least one uptime was already sent + _ = p.ut.Mark.Done(ctx) + + // if the stream loop fails for any reason retry + // unless context was cancelled + for { + err := p.recv(ctx) + if err != nil { + log.Error().Err(err).Msg("failed to process power events") + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(5 * time.Second): + } + } +} + +func (p *PowerServer) Start(ctx context.Context) error { + return p.events(ctx) +} diff --git a/pkg/power/uptime.go b/pkg/power/uptime.go new file mode 100644 index 00000000..e94e80b0 --- /dev/null +++ b/pkg/power/uptime.go @@ -0,0 +1,129 @@ +package power + +import ( + "context" + "fmt" + "runtime/debug" + "sync" + "time" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/shirou/gopsutil/host" + "github.com/threefoldtech/zos4/pkg/stubs" + "github.com/threefoldtech/zosbase/pkg/app" + "github.com/threefoldtech/zosbase/pkg/utils" +) + +const ( + reportUptimeEvery = 40 * time.Minute +) + +type Uptime struct { + // Mark is set to done after the first uptime is sent + Mark utils.Mark + + registrarGateway *stubs.RegistrarGatewayStub + m sync.Mutex +} + +func NewUptime(registrarGateway *stubs.RegistrarGatewayStub) (*Uptime, error) { + return &Uptime{ + // id: id, + registrarGateway: registrarGateway, + Mark: utils.NewMark(), + }, nil +} + +func (u *Uptime) SendNow() error { + if !isNodeHealthy() { + log.Error().Msg("node is not healthy skipping uptime reports") + return nil + } + + // the mutex is to avoid race when SendNow is called + // while the times reporting is working + u.m.Lock() + defer u.m.Unlock() + + // this can take sometime in case of connection problems + // hence we first establish a connection THEN get the node + // uptime. + // to make sure the uptime is correct at the time of reporting + uptime, err := host.Uptime() + if err != nil { + return errors.Wrap(err, "failed to get uptime") + } + + return u.registrarGateway.UpdateNodeUptimeV2(context.Background(), uptime, uint64(time.Now().Unix())) +} + +func (u *Uptime) uptime(ctx context.Context) error { + for { + log.Debug().Msg("updating node uptime") + err := u.SendNow() + if err != nil { + return errors.Wrap(err, "failed to report uptime") + } + + u.Mark.Signal() + + log.Info().Msg("node uptime sent successfully") + + select { + case <-ctx.Done(): + return nil + case <-time.After(reportUptimeEvery): + continue + } + } +} + +// start uptime reporting. returns a channel that is closed immediately after +// the first uptime is reported. +func (u *Uptime) Start(ctx context.Context) { + // uptime update + defer log.Info().Msg("uptime reporting exited permanently") + safeUptime := func(ctx context.Context) (err error) { + defer func() { + if p := recover(); p != nil { + err = fmt.Errorf("uptime reporting has panicked: %+v\n%s", p, string(debug.Stack())) + } + }() + + err = u.uptime(ctx) + return err + } + + for { + err := safeUptime(ctx) + if errors.Is(err, context.Canceled) { + log.Info().Msg("stop uptime reporting. context cancelled") + return + } else if err != nil { + log.Error().Err(err).Msg("sending uptime failed") + } else { + // context was cancelled + return + } + // even there is no error we try again until ctx is cancelled + <-time.After(10 * time.Second) + } +} + +func isNodeHealthy() bool { + healthy := true + if app.CheckFlag(app.ReadonlyCache) { + log.Error().Msg("node cache is read only") + healthy = false + } + if app.CheckFlag(app.LimitedCache) { + log.Error().Msg("node is running on limited cache") + healthy = false + } + if app.CheckFlag(app.NotReachable) { + log.Error().Msg("node can not reach grid services") + // healthy = false // disabled for now + } + return healthy +} diff --git a/pkg/primitives/gateway/gatewayfqdn.go b/pkg/primitives/gateway/gatewayfqdn.go new file mode 100644 index 00000000..80d6801b --- /dev/null +++ b/pkg/primitives/gateway/gatewayfqdn.go @@ -0,0 +1,47 @@ +package gateway + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/pkg/errors" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var _ provision.Manager = (*FQDNManager)(nil) + +type FQDNManager struct { + zbus zbus.Client +} + +func NewFQDNManager(zbus zbus.Client) *FQDNManager { + return &FQDNManager{zbus} +} + +func (p *FQDNManager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + result := zos.GatewayFQDNResult{} + var proxy zos.GatewayFQDNProxy + if err := json.Unmarshal(wl.Data, &proxy); err != nil { + return nil, fmt.Errorf("failed to unmarshal gateway proxy from reservation: %w", err) + } + + gateway := stubs.NewGatewayStub(p.zbus) + err := gateway.SetFQDNProxy(ctx, wl.ID.String(), proxy) + if err != nil { + return nil, errors.Wrap(err, "failed to setup fqdn proxy") + } + return result, nil +} + +func (p *FQDNManager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + gateway := stubs.NewGatewayStub(p.zbus) + if err := gateway.DeleteNamedProxy(ctx, wl.ID.String()); err != nil { + return errors.Wrap(err, "failed to delete fqdn proxy") + } + return nil +} diff --git a/pkg/primitives/gateway/gatewayname.go b/pkg/primitives/gateway/gatewayname.go new file mode 100644 index 00000000..547f3000 --- /dev/null +++ b/pkg/primitives/gateway/gatewayname.go @@ -0,0 +1,50 @@ +package gateway + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var _ provision.Manager = (*NameManager)(nil) + +type NameManager struct { + zbus zbus.Client +} + +func NewNameManager(zbus zbus.Client) *NameManager { + return &NameManager{zbus} +} + +func (p *NameManager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + result := zos.GatewayProxyResult{} + var proxy zos.GatewayNameProxy + if err := json.Unmarshal(wl.Data, &proxy); err != nil { + return nil, fmt.Errorf("failed to unmarshal gateway proxy from reservation: %w", err) + } + + gateway := stubs.NewGatewayStub(p.zbus) + fqdn, err := gateway.SetNamedProxy(ctx, wl.ID.String(), proxy) + if err != nil { + return nil, errors.Wrap(err, "failed to setup name proxy") + } + result.FQDN = fqdn + log.Debug().Str("domain", fqdn).Msg("domain reserved") + return result, nil +} + +func (p *NameManager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + gateway := stubs.NewGatewayStub(p.zbus) + if err := gateway.DeleteNamedProxy(ctx, wl.ID.String()); err != nil { + return errors.Wrap(err, "failed to delete name proxy") + } + return nil +} diff --git a/pkg/primitives/network-light/network.go b/pkg/primitives/network-light/network.go new file mode 100644 index 00000000..7714393b --- /dev/null +++ b/pkg/primitives/network-light/network.go @@ -0,0 +1,67 @@ +package netlight + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zbus" + provision "github.com/threefoldtech/zos4/pkg/provision" + gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var ( + _ provision.Manager = (*Manager)(nil) + _ provision.Updater = (*Manager)(nil) +) + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +// networkProvision is entry point to provision a network +func (p *Manager) networkProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + twin, _ := provision.GetDeploymentID(ctx) + + var network zos.NetworkLight + if err := json.Unmarshal(wl.Data, &network); err != nil { + return fmt.Errorf("failed to unmarshal network from reservation: %w", err) + } + + mgr := stubs.NewNetworkerLightStub(p.zbus) + log.Debug().Str("network", fmt.Sprintf("%+v", network)).Msg("provision network") + + err := mgr.Create(ctx, string(zos.NetworkID(twin, wl.Name)), network.Subnet.IPNet, network.Mycelium.Key) + if err != nil { + return errors.Wrapf(err, "failed to create network resource for network %s", wl.ID) + } + + return nil +} + +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return nil, p.networkProvisionImpl(ctx, wl) +} + +func (p *Manager) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return nil, p.networkProvisionImpl(ctx, wl) +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + twin, _ := provision.GetDeploymentID(ctx) + mgr := stubs.NewNetworkerLightStub(p.zbus) + + if err := mgr.Delete(ctx, string(zos.NetworkID(twin, wl.Name))); err != nil { + return fmt.Errorf("failed to delete network resource: %w", err) + } + + return nil +} diff --git a/pkg/primitives/network/network.go b/pkg/primitives/network/network.go new file mode 100644 index 00000000..b1fd1890 --- /dev/null +++ b/pkg/primitives/network/network.go @@ -0,0 +1,71 @@ +package network + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var ( + _ provision.Manager = (*Manager)(nil) + _ provision.Updater = (*Manager)(nil) +) + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +// networkProvision is entry point to provision a network +func (p *Manager) networkProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + twin, _ := provision.GetDeploymentID(ctx) + + var network zos.Network + if err := json.Unmarshal(wl.Data, &network); err != nil { + return fmt.Errorf("failed to unmarshal network from reservation: %w", err) + } + + mgr := stubs.NewNetworkerStub(p.zbus) + log.Debug().Str("network", fmt.Sprintf("%+v", network)).Msg("provision network") + + _, err := mgr.CreateNR(ctx, wl.ID, pkg.Network{ + Network: network, + NetID: zos.NetworkID(twin, wl.Name), + }) + if err != nil { + return errors.Wrapf(err, "failed to create network resource for network %s", wl.ID) + } + + return nil +} + +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return nil, p.networkProvisionImpl(ctx, wl) +} + +func (p *Manager) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return nil, p.networkProvisionImpl(ctx, wl) +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + mgr := stubs.NewNetworkerStub(p.zbus) + + if err := mgr.DeleteNR(ctx, wl.ID); err != nil { + return fmt.Errorf("failed to delete network resource: %w", err) + } + + return nil +} diff --git a/pkg/primitives/provisioner.go b/pkg/primitives/provisioner.go new file mode 100644 index 00000000..ae94dd11 --- /dev/null +++ b/pkg/primitives/provisioner.go @@ -0,0 +1,40 @@ +package primitives + +import ( + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/primitives/gateway" + "github.com/threefoldtech/zos4/pkg/primitives/network" + netlight "github.com/threefoldtech/zos4/pkg/primitives/network-light" + "github.com/threefoldtech/zos4/pkg/primitives/pubip" + "github.com/threefoldtech/zos4/pkg/primitives/qsfs" + "github.com/threefoldtech/zos4/pkg/primitives/vm" + vmlight "github.com/threefoldtech/zos4/pkg/primitives/vm-light" + "github.com/threefoldtech/zos4/pkg/primitives/volume" + "github.com/threefoldtech/zos4/pkg/primitives/zdb" + "github.com/threefoldtech/zos4/pkg/primitives/zlogs" + "github.com/threefoldtech/zos4/pkg/primitives/zmount" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" +) + +// NewPrimitivesProvisioner creates a new 0-OS provisioner +func NewPrimitivesProvisioner(zbus zbus.Client) provision.Provisioner { + managers := map[gridtypes.WorkloadType]provision.Manager{ + zos.ZMountType: zmount.NewManager(zbus), + zos.ZLogsType: zlogs.NewManager(zbus), + zos.QuantumSafeFSType: qsfs.NewManager(zbus), + zos.ZDBType: zdb.NewManager(zbus), + zos.NetworkType: network.NewManager(zbus), + zos.PublicIPType: pubip.NewManager(zbus), + zos.PublicIPv4Type: pubip.NewManager(zbus), // backward compatibility + zos.ZMachineType: vm.NewManager(zbus), + zos.NetworkLightType: netlight.NewManager(zbus), + zos.ZMachineLightType: vmlight.NewManager(zbus), + zos.VolumeType: volume.NewManager(zbus), + zos.GatewayNameProxyType: gateway.NewNameManager(zbus), + zos.GatewayFQDNProxyType: gateway.NewFQDNManager(zbus), + } + + return provision.NewMapProvisioner(managers) +} diff --git a/pkg/primitives/pubip/public_ip.go b/pkg/primitives/pubip/public_ip.go new file mode 100644 index 00000000..4345f2ee --- /dev/null +++ b/pkg/primitives/pubip/public_ip.go @@ -0,0 +1,236 @@ +package pubip + +import ( + "context" + "encoding/hex" + "encoding/json" + "fmt" + "net" + "strings" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/network/ifaceutil" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var _ provision.Manager = (*Manager)(nil) + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return p.publicIPProvisionImpl(ctx, wl) +} + +func (p *Manager) getAssignedPublicIP(ctx context.Context, wl *gridtypes.WorkloadWithID) (ip gridtypes.IPNet, gw net.IP, err error) { + // Okay, this implementation is tricky but will be simplified once we store the reserved IP + // on the contract. + + // Okay, so first (and easiest path) is that the Ip was already + // assigned, hence we can simply use it again. this is usually + // the case if the node is rerunning the same workload deployment for + // some reason. + if !wl.Result.IsNil() && wl.Result.State == gridtypes.StateOk { + var result zos.PublicIPResult + if err := wl.Result.Unmarshal(&result); err != nil { + return ip, gw, errors.Wrap(err, "failed to load public ip result") + } + + return result.IP, result.Gateway, nil + } + // otherwise we do the following: + + // - We need to get the contract and the farm object this node belongs to + deployment, err := provision.GetDeployment(ctx) + if err != nil { + return ip, gw, errors.Wrap(err, "failed to get deployment") + } + contract := provision.GetContract(ctx) + + // - now we find out ALL ips belonging to this contract + reserved := contract.PublicIPs + + // make sure we have enough reserved IPs + // we process both ipv4 type and ip type to be backward compatible + ipWorkloads := deployment.ByType(zos.PublicIPv4Type, zos.PublicIPType) + reservedCount := 0 + for _, wl := range ipWorkloads { + config, err := p.getPublicIPData(ctx, wl) + if err != nil { + return gridtypes.IPNet{}, nil, err + } + if config.V4 { + reservedCount += 1 + } + } + + if reservedCount > len(reserved) { + return ip, gw, fmt.Errorf("required %d ips while contract has %d ip reserved", len(ipWorkloads), len(reserved)) + } + + usedIPs := make(map[string]struct{}) + + for _, ipWl := range ipWorkloads { + if wl.Name == ipWl.Name { + // we don't need this. + continue + } + + if ipWl.Result.IsNil() || ipWl.Result.State != gridtypes.StateOk { + continue + } + + used, err := GetPubIPConfig(ipWl) + if err != nil { + return ip, gw, err + } + + usedIPs[used.IP.String()] = struct{}{} + } + + // otherwise we go over the list of IPs and take the first free one + for _, reservedIP := range reserved { + if _, ok := usedIPs[reservedIP.IP]; !ok { + // free ip. we can just take it + ip, err = gridtypes.ParseIPNet(reservedIP.IP) + if err != nil { + return ip, gw, fmt.Errorf("found a malformed ip address in contract object '%s'", ip.IP) + } + gw = net.ParseIP(reservedIP.Gateway) + if gw == nil { + return ip, gw, fmt.Errorf("found a malformed gateway address in farm object '%s'", reservedIP.Gateway) + } + + return ip, gw, nil + } + } + + return ip, gw, fmt.Errorf("could not allocate public IP address to workload") +} + +func (p *Manager) getPublicIPData(ctx context.Context, wl *gridtypes.WorkloadWithID) (result zos.PublicIP, err error) { + switch wl.Type { + case zos.PublicIPv4Type: + // backword compatibility with older ipv4 type + result.V4 = true + case zos.PublicIPType: + err = json.Unmarshal(wl.Data, &result) + default: + return result, fmt.Errorf("invalid workload type expecting (%s or %s) got '%s'", zos.PublicIPv4Type, zos.PublicIPType, wl.Type) + } + + return +} + +func (p *Manager) publicIPProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) (result zos.PublicIPResult, err error) { + config, err := p.getPublicIPData(ctx, wl) + if err != nil { + return zos.PublicIPResult{}, err + } + + tapName := wl.ID.Unique("pub") + network := stubs.NewNetworkerStub(p.zbus) + fName := filterName(tapName) + + if network.PubIPFilterExists(ctx, fName) { + return result, provision.ErrNoActionNeeded + } + + var ipv6 gridtypes.IPNet + var ipv4 gridtypes.IPNet + var gw4 net.IP + + mac := ifaceutil.HardwareAddrFromInputBytes([]byte(tapName)) + if config.V6 { + pubIP6Base, err := network.GetPublicIPv6Subnet(ctx) + if err != nil { + return result, errors.Wrap(err, "could not look up ipv6 prefix") + } + + ipv6, err = predictedSlaac(pubIP6Base, mac.String()) + if err != nil { + return zos.PublicIPResult{}, errors.Wrap(err, "could not compute ipv6 valu") + } + } + + if config.V4 { + ipv4, gw4, err = p.getAssignedPublicIP(ctx, wl) + if err != nil { + return zos.PublicIPResult{}, err + } + } + + result.IP = ipv4 + result.IPv6 = ipv6 + result.Gateway = gw4 + + ifName := fmt.Sprintf("p-%s", tapName) // TODO: clean this up, needs to come form networkd + err = network.SetupPubIPFilter(ctx, fName, ifName, ipv4.IP, ipv6.IP, mac.String()) + + return +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + // Disconnect the public interface from the network if one exists + network := stubs.NewNetworkerStub(p.zbus) + tapName := wl.ID.Unique("pub") + fName := filterName(tapName) + if err := network.RemovePubIPFilter(ctx, fName); err != nil { + log.Error().Err(err).Msg("could not remove filter rules") + } + return network.DisconnectPubTap(ctx, tapName) +} + +func filterName(reservationID string) string { + return fmt.Sprintf("r-%s", reservationID) +} + +// modified version of: https://github.com/MalteJ/docker/blob/f09b7897d2a54f35a0b26f7cbe750b3c9383a553/daemon/networkdriver/bridge/driver.go#L585 +func predictedSlaac(base net.IPNet, mac string) (gridtypes.IPNet, error) { + // TODO: get pub ipv6 prefix + hx := strings.Replace(mac, ":", "", -1) + hw, err := hex.DecodeString(hx) + if err != nil { + return gridtypes.IPNet{}, errors.New("Could not parse MAC address " + mac) + } + + hw[0] ^= 0x2 + + base.IP[8] = hw[0] + base.IP[9] = hw[1] + base.IP[10] = hw[2] + base.IP[11] = 0xFF + base.IP[12] = 0xFE + base.IP[13] = hw[3] + base.IP[14] = hw[4] + base.IP[15] = hw[5] + + return gridtypes.IPNet{IPNet: base}, nil +} + +// GetPubIPConfig get the public ip, and the gateway from the workload +func GetPubIPConfig(wl *gridtypes.WorkloadWithID) (result zos.PublicIPResult, err error) { + if wl.Type != zos.PublicIPv4Type && wl.Type != zos.PublicIPType { + return result, fmt.Errorf("workload for public IP is of wrong type") + } + + if wl.Result.State != gridtypes.StateOk { + return result, fmt.Errorf("public ip workload is not okay") + } + + if err := wl.Result.Unmarshal(&result); err != nil { + return result, errors.Wrap(err, "failed to load ip result") + } + + return result, nil +} diff --git a/pkg/primitives/qsfs/qsfs.go b/pkg/primitives/qsfs/qsfs.go new file mode 100644 index 00000000..32a6bdd5 --- /dev/null +++ b/pkg/primitives/qsfs/qsfs.go @@ -0,0 +1,68 @@ +package qsfs + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/pkg/errors" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var ( + _ provision.Manager = (*Manager)(nil) + _ provision.Updater = (*Manager)(nil) +) + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + var result zos.QuatumSafeFSResult + var proxy zos.QuantumSafeFS + if err := json.Unmarshal(wl.Data, &proxy); err != nil { + return nil, fmt.Errorf("failed to unmarshal qsfs data from reservation: %w", err) + } + qsfs := stubs.NewQSFSDStub(p.zbus) + info, err := qsfs.Mount(ctx, wl.ID.String(), proxy) + if err != nil { + return nil, errors.Wrap(err, "failed to create qsfs mount") + } + result.Path = info.Path + result.MetricsEndpoint = info.MetricsEndpoint + return result, nil +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + qsfs := stubs.NewQSFSDStub(p.zbus) + err := qsfs.SignalDelete(ctx, wl.ID.String()) + if err != nil { + return errors.Wrap(err, "failed to delete qsfs") + } + return nil +} + +func (p *Manager) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + var result zos.QuatumSafeFSResult + var proxy zos.QuantumSafeFS + if err := json.Unmarshal(wl.Data, &proxy); err != nil { + return nil, fmt.Errorf("failed to unmarshal qsfs data from reservation: %w", err) + } + qsfs := stubs.NewQSFSDStub(p.zbus) + info, err := qsfs.UpdateMount(ctx, wl.ID.String(), proxy) + if err != nil { + return nil, errors.Wrap(err, "failed to update qsfs mount") + } + result.Path = info.Path + result.MetricsEndpoint = info.MetricsEndpoint + return result, nil +} diff --git a/pkg/primitives/statistics.go b/pkg/primitives/statistics.go new file mode 100644 index 00000000..cc9d09a0 --- /dev/null +++ b/pkg/primitives/statistics.go @@ -0,0 +1,345 @@ +package primitives + +import ( + "context" + "encoding/json" + "fmt" + "os/exec" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/shirou/gopsutil/mem" + "github.com/threefoldtech/zos4/pkg" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/capacity" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/kernel" +) + +type ( + currentCapacityKey struct{} +) + +// GetCapacity gets current capacity from context +func GetCapacity(ctx context.Context) gridtypes.Capacity { + val := ctx.Value(currentCapacityKey{}) + if val == nil { + panic("no current capacity injected") + } + + return val.(gridtypes.Capacity) +} + +var _ provision.Provisioner = (*Statistics)(nil) + +type Reserved func() (gridtypes.Capacity, error) + +// Statistics a provisioner interceptor that keeps track +// of consumed capacity. It also does validate of required +// capacity and then can report that this capacity can not be fulfilled +type Statistics struct { + inner provision.Provisioner + total gridtypes.Capacity + reserved Reserved + storage provision.Storage + mem gridtypes.Unit +} + +// NewStatistics creates a new statistics provisioner interceptor. +// Statistics provisioner keeps track of used capacity and update explorer when it changes +func NewStatistics(total gridtypes.Capacity, storage provision.Storage, reserved Reserved, inner provision.Provisioner) *Statistics { + vm, err := mem.VirtualMemory() + if err != nil { + panic(err) + } + + if reserved == nil { + reserved = func() (gridtypes.Capacity, error) { + return gridtypes.Capacity{}, nil + } + } + + return &Statistics{ + inner: inner, + total: total, + reserved: reserved, + storage: storage, + mem: gridtypes.Unit(vm.Total), + } +} + +type activeCounters struct { + // used capacity from storage + reserved + cap gridtypes.Capacity + // Total deployments count + deployments int + // Total workloads count + workloads int + // last deployment timestamp + lastDeploymentTimestamp gridtypes.Timestamp +} + +// Get all used capacity from storage + reserved / deployments count and workloads count +func (s *Statistics) active(exclude ...provision.Exclude) (activeCounters, error) { + storageCap, err := s.storage.Capacity(exclude...) + if err != nil { + return activeCounters{}, err + } + reserved, err := s.reserved() + if err != nil { + return activeCounters{}, err + } + storageCap.Cap.Add(&reserved) + + return activeCounters{ + storageCap.Cap, + len(storageCap.Deployments), + storageCap.Workloads, + storageCap.LastDeploymentTimestamp, + }, err +} + +// Total returns the node total capacity +func (s *Statistics) Total() gridtypes.Capacity { + return s.total +} + +// getUsableMemoryBytes returns the used capacity by *reservations* and usable free memory. for the memory +// it takes into account reserved memory for the system +// excluding (not including it as 'used' any workload or deployment that matches the exclusion list) +func (s *Statistics) getUsableMemoryBytes(exclude ...provision.Exclude) (gridtypes.Capacity, gridtypes.Unit, error) { + // [ ] + // [[R][ WL ] ] + // [[ actual ] ] + + activeCounters, err := s.active(exclude...) + cap := activeCounters.cap + if err != nil { + return cap, 0, err + } + + m, err := mem.VirtualMemory() + if err != nil { + return cap, 0, err + } + + theoreticalUsed := cap.MRU + actualUsed := m.Total - m.Available + used := gridtypes.Max(theoreticalUsed, gridtypes.Unit(actualUsed)) + + usable := gridtypes.Unit(m.Total) - used + return cap, usable, nil +} + +func (s *Statistics) hasEnoughCapacity(wl *gridtypes.WorkloadWithID) (gridtypes.Capacity, error) { + required, err := wl.Capacity() + if err != nil { + return gridtypes.Capacity{}, errors.Wrap(err, "failed to calculate workload needed capacity") + } + + // get used capacity by ALL workloads excluding this workload + // we do that by providing an exclusion list + used, usable, err := s.getUsableMemoryBytes(func(dl_ *gridtypes.Deployment, wl_ *gridtypes.Workload) bool { + id, _ := gridtypes.NewWorkloadID(dl_.TwinID, dl_.ContractID, wl_.Name) + return id == wl.ID + }) + if err != nil { + return used, errors.Wrap(err, "failed to get available memory") + } + + if required.MRU > usable { + return used, fmt.Errorf("cannot fulfil required memory size %d bytes out of usable %d bytes", required.MRU, usable) + } + + // check other resources as well? + return used, nil +} + +// Initialize implements provisioner interface +func (s *Statistics) Initialize(ctx context.Context) error { + return s.inner.Initialize(ctx) +} + +// Provision implements the provisioner interface +func (s *Statistics) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (result gridtypes.Result, err error) { + current, err := s.hasEnoughCapacity(wl) + if err != nil { + return result, errors.Wrap(err, "failed to satisfy required capacity") + } + + ctx = context.WithValue(ctx, currentCapacityKey{}, current) + return s.inner.Provision(ctx, wl) +} + +// Decommission implements the decomission interface +func (s *Statistics) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + return s.inner.Deprovision(ctx, wl) +} + +// Update implements the provisioner interface +func (s *Statistics) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) { + return s.inner.Update(ctx, wl) +} + +// CanUpdate implements the provisioner interface +func (s *Statistics) CanUpdate(ctx context.Context, typ gridtypes.WorkloadType) bool { + return s.inner.CanUpdate(ctx, typ) +} + +func (s *Statistics) Pause(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) { + return s.inner.Pause(ctx, wl) +} + +func (s *Statistics) Resume(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) { + return s.inner.Resume(ctx, wl) +} + +type statsStream struct { + stats *Statistics +} + +func NewStatisticsStream(s *Statistics) pkg.Statistics { + return &statsStream{s} +} + +func (s *statsStream) ReservedStream(ctx context.Context) <-chan gridtypes.Capacity { + ch := make(chan gridtypes.Capacity) + go func(ctx context.Context) { + defer close(ch) + for { + select { + case <-ctx.Done(): + return + case <-time.After(2 * time.Minute): + activeCounters, err := s.stats.active() + if err != nil { + log.Error().Err(err).Msg("failed to get used capacity") + } + ch <- activeCounters.cap + } + } + }(ctx) + return ch +} + +func (s *statsStream) Current() (gridtypes.Capacity, error) { + activeCounters, err := s.stats.active() + return activeCounters.cap, err +} + +func (s *statsStream) Total() gridtypes.Capacity { + return s.stats.Total() +} + +func (s *statsStream) Workloads() (int, error) { + capacity, err := s.stats.storage.Capacity() + if err != nil { + return 0, err + } + return capacity.Workloads, nil +} + +func (s *statsStream) GetCounters() (pkg.Counters, error) { + activeCounters, err := s.stats.active() + if err != nil { + return pkg.Counters{}, err + } + + reserved, err := s.stats.reserved() + if err != nil { + return pkg.Counters{}, err + } + + conn, err := s.openConnectionsCount() + if err != nil { + return pkg.Counters{}, err + } + return pkg.Counters{ + Total: s.stats.Total(), + Used: activeCounters.cap, + System: reserved, + OpenConnecions: conn, + Users: pkg.UsersCounters{ + Deployments: activeCounters.deployments, + Workloads: activeCounters.workloads, + LastDeploymentTimestamp: activeCounters.lastDeploymentTimestamp, + }, + }, nil +} + +func (s *statsStream) ListGPUs() ([]pkg.GPUInfo, error) { + usedGpus := func() (map[string]uint64, error) { + gpus := make(map[string]uint64) + active, err := s.stats.storage.Capacity() + if err != nil { + return nil, err + } + for _, dl := range active.Deployments { + for _, wl := range dl.Workloads { + if wl.Type != zos.ZMachineType { + continue + } + var vm zos.ZMachine + if err := json.Unmarshal(wl.Data, &vm); err != nil { + return nil, errors.Wrapf(err, "invalid workload data (%d.%s)", dl.ContractID, wl.Name) + } + + for _, gpu := range vm.GPU { + gpus[string(gpu)] = dl.ContractID + } + } + } + return gpus, nil + } + var list []pkg.GPUInfo + if kernel.GetParams().IsGPUDisabled() { + return list, nil + } + devices, err := capacity.ListPCI(capacity.GPU) + if err != nil { + return nil, errors.Wrap(err, "failed to list available devices") + } + + used, err := usedGpus() + if err != nil { + return nil, errors.Wrap(err, "failed to list used gpus") + } + + for _, pciDevice := range devices { + id := pciDevice.ShortID() + info := pkg.GPUInfo{ + ID: id, + Vendor: "unknown", + Device: "unknown", + Contract: used[id], + } + + vendor, device, ok := pciDevice.GetDevice() + if ok { + info.Vendor = vendor.Name + info.Device = device.Name + } + + subdevice, ok := pciDevice.GetSubdevice() + if ok { + info.Device = subdevice.Name + } + + list = append(list, info) + } + + return list, nil +} + +func (s *statsStream) openConnectionsCount() (int, error) { + cmd := exec.Command("/bin/sh", "-c", "ss -tnH state established | wc -l") + out, err := cmd.Output() + if err != nil { + return 0, err + } + return strconv.Atoi(strings.TrimSpace(string(out))) +} diff --git a/pkg/primitives/vm-light/gpu.go b/pkg/primitives/vm-light/gpu.go new file mode 100644 index 00000000..e1122088 --- /dev/null +++ b/pkg/primitives/vm-light/gpu.go @@ -0,0 +1,163 @@ +package vmlight + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zosbase/pkg/capacity" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/kernel" +) + +const ( + sysDeviceBase = "/sys/bus/pci/devices" + vfioPCIModue = "vfio-pci" +) + +var ( + modules = []string{"vfio", "vfio-pci", "vfio_iommu_type1"} +) + +func (m *Manager) initGPUVfioModules() error { + for _, mod := range modules { + if err := exec.Command("modprobe", mod).Run(); err != nil { + return errors.Wrapf(err, "failed to probe module: %s", mod) + } + } + + // also set unsafe interrupts + if err := os.WriteFile("/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts", []byte{'1'}, 0644); err != nil { + return errors.Wrapf(err, "failed to set allow_unsafe_interrupts for vfio") + } + + return nil +} + +// unbindBootVga is a helper method to disconnect the boot vga if needed +func (m *Manager) unbindBootVga() error { + const vtConsole = "/sys/class/vtconsole" + vts, err := os.ReadDir(vtConsole) + if err != nil && !os.IsNotExist(err) { + return errors.Wrap(err, "failed to list VTs") + } + for _, vt := range vts { + if err := os.WriteFile(filepath.Join(vtConsole, vt.Name(), "bind"), []byte("0"), 0644); err != nil { + // log or return ? + return errors.Wrapf(err, "failed to unbind vt '%s'", vt.Name()) + } + } + + if err := os.WriteFile("/sys/bus/platform/drivers/efi-framebuffer/unbind", []byte("efi-framebuffer.0"), 0644); err != nil { + log.Warn().Err(err).Msg("failed to disable frame-buffer") + } + + return nil +} + +// this function will make sure ALL gpus are bind to the right driver +func (m *Manager) initGPUs() error { + if kernel.GetParams().IsGPUDisabled() { + return nil + } + if err := m.initGPUVfioModules(); err != nil { + return err + } + + gpus, err := capacity.ListPCI(capacity.GPU) + if err != nil { + return errors.Wrap(err, "failed to list system GPUs") + } + + for _, gpu := range gpus { + bootVga, err := gpu.Flag("boot_vga") + if err != nil && !os.IsNotExist(err) { + return errors.Wrapf(err, "failed to read GPU '%s' boot_vga flag", gpu.Slot) + } + + if bootVga > 0 { + if err := m.unbindBootVga(); err != nil { + log.Warn().Err(err).Msg("error while unbinding boot vga") + } + } + + devices, err := capacity.IoMMUGroup(gpu, capacity.Not(capacity.PCIBridge)) + if err != nil { + return errors.Wrapf(err, "failed to list devices in iommu group for '%s'", gpu.Slot) + } + + for _, pci := range devices { + device := filepath.Join(sysDeviceBase, pci.Slot) + driver := filepath.Join(device, "driver") + ln, err := os.Readlink(driver) + if err != nil && !os.IsNotExist(err) { + return errors.Wrap(err, "failed to check device driver") + } + + driverName := filepath.Base(ln) + //note: Base return `.` if path is empty string + if driverName == vfioPCIModue { + // correct driver is bind to the device + continue + } else if driverName != "." { + // another driver is bind to this device! + // this should not happen but we need to be sure + // let's unbind + + if err := os.WriteFile(filepath.Join(driver, "unbind"), []byte(pci.Slot), 0600); err != nil { + return errors.Wrapf(err, "failed to unbind gpu '%s' from driver '%s'", pci.ShortID(), driverName) + } + } + + // we then need to do an override + if err := os.WriteFile(filepath.Join(device, "driver_override"), []byte(vfioPCIModue), 0644); err != nil { + return errors.Wrapf(err, "failed to override the device '%s' driver", pci.Slot) + } + + if err := os.WriteFile("/sys/bus/pci/drivers_probe", []byte(pci.Slot), 0200); err != nil { + return errors.Wrapf(err, "failed to bind device '%s' to vfio", pci.Slot) + } + } + } + + return nil +} + +// expandGPUs expands the set of provided GPUs with all devices in the IoMMU group. +// It's required that all devices in an iommu group to be passed together to a VM +// hence we need that for each GPU in the list add all the devices from each device +// IOMMU group +func (m *Manager) expandGPUs(gpus []zos.GPU) ([]capacity.PCI, error) { + if kernel.GetParams().IsGPUDisabled() { + return nil, fmt.Errorf("GPU is disabled on this node") + } + all, err := capacity.ListPCI(capacity.GPU) + if err != nil { + return nil, errors.Wrap(err, "failed to list available GPUs") + } + + allMap := make(map[string]capacity.PCI) + for _, device := range all { + allMap[device.ShortID()] = device + } + + var devices []capacity.PCI + for _, gpu := range gpus { + device, ok := allMap[string(gpu)] + if !ok { + return nil, fmt.Errorf("unknown GPU id '%s'", gpu) + } + + sub, err := capacity.IoMMUGroup(device, capacity.Not(capacity.PCIBridge)) + if err != nil { + return nil, errors.Wrapf(err, "failed to list all devices belonging to '%s'", device.Slot) + } + + devices = append(devices, sub...) + } + + return devices, nil +} diff --git a/pkg/primitives/vm-light/pause.go b/pkg/primitives/vm-light/pause.go new file mode 100644 index 00000000..79d25484 --- /dev/null +++ b/pkg/primitives/vm-light/pause.go @@ -0,0 +1,29 @@ +package vmlight + +import ( + "context" + + provision "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +func (m *Manager) Pause(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + vm := stubs.NewVMModuleStub(m.zbus) + + if err := vm.Lock(ctx, wl.ID.String(), true); err != nil { + return provision.UnChanged(err) + } + + return provision.Paused() +} + +func (m *Manager) Resume(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + vm := stubs.NewVMModuleStub(m.zbus) + + if err := vm.Lock(ctx, wl.ID.String(), false); err != nil { + return provision.UnChanged(err) + } + + return provision.Ok() +} diff --git a/pkg/primitives/vm-light/utils.go b/pkg/primitives/vm-light/utils.go new file mode 100644 index 00000000..123f33c4 --- /dev/null +++ b/pkg/primitives/vm-light/utils.go @@ -0,0 +1,380 @@ +package vmlight + +import ( + "context" + "fmt" + "net" + "os" + "path/filepath" + "strings" + + "github.com/BurntSushi/toml" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +// fill up the VM (machine) object with write boot config for a full virtual machine (with a disk image) +func (p *Manager) prepVirtualMachine( + ctx context.Context, + cloudImage string, + imageInfo FListInfo, + machine *pkg.VM, + config *zos.ZMachineLight, + deployment *gridtypes.Deployment, + wl *gridtypes.WorkloadWithID, +) error { + storage := stubs.NewStorageModuleStub(p.zbus) + // if a VM the vm has to have at least one mount + if len(config.Mounts) == 0 { + return fmt.Errorf("at least one mount has to be attached for Vm mode") + } + + machine.KernelImage = filepath.Join(cloudImage, "hypervisor-fw") + disk, err := deployment.Get(config.Mounts[0].Name) + if err != nil { + return err + } + + if disk.Type != zos.ZMountType { + return fmt.Errorf("mount is not a valid disk workload") + } + + if disk.Result.State != gridtypes.StateOk { + return fmt.Errorf("boot disk was not deployed correctly") + } + + info, err := storage.DiskLookup(ctx, disk.ID.String()) + if err != nil { + return errors.Wrap(err, "disk does not exist") + } + + // TODO: DiskWrite will not override the disk if it already has a partition table + // or a filesystem. this means that if later the disk is assigned to a new VM with + // a different flist it will have the same old operating system copied from previous + // setup. + if err = storage.DiskWrite(ctx, disk.ID.String(), imageInfo.ImagePath); err != nil { + return errors.Wrap(err, "failed to write image to disk") + } + + machine.Boot = pkg.Boot{ + Type: pkg.BootDisk, + Path: info.Path, + } + + return p.vmMounts(ctx, deployment, config.Mounts[1:], false, machine) +} + +// prepare the machine and fill it up with proper boot flags for a container VM +func (p *Manager) prepContainer( + ctx context.Context, + cloudImage string, + imageInfo FListInfo, + machine *pkg.VM, + config *zos.ZMachineLight, + deployment *gridtypes.Deployment, + wl *gridtypes.WorkloadWithID, +) error { + // - if Container, remount RW + // prepare for container + var ( + storage = stubs.NewStorageModuleStub(p.zbus) + flist = stubs.NewFlisterStub(p.zbus) + ) + + if err := flist.Unmount(ctx, wl.ID.String()); err != nil { + return errors.Wrapf(err, "failed to unmount flist: %s", wl.ID.String()) + } + rootfsSize := config.RootSize() + // create a persisted volume for the vm. we don't do it automatically + // via the flist, so we have control over when to decomission this volume. + // remounting in RW mode + volName := fmt.Sprintf("rootfs:%s", wl.ID.String()) + + volumeExists, err := storage.VolumeExists(ctx, volName) + if err != nil { + return errors.Wrap(err, "failed to check if vm rootfs exists") + } + + volume, err := storage.VolumeCreate(ctx, volName, rootfsSize) + if err != nil { + return errors.Wrap(err, "failed to create vm rootfs") + } + + defer func() { + if err != nil { + // vm creation failed, + if err := storage.VolumeDelete(ctx, volName); err != nil { + log.Error().Err(err).Str("volume", volName).Msg("failed to delete persisted volume") + } + } + }() + + mnt, err := flist.Mount(ctx, wl.ID.String(), config.FList, pkg.MountOptions{ + ReadOnly: false, + PersistedVolume: volume.Path, + }) + if err != nil { + return errors.Wrapf(err, "failed to mount flist: %s", wl.ID.String()) + } + + // clean up host keys + if !volumeExists { + files, err := filepath.Glob(filepath.Join(mnt, "etc", "ssh", "ssh_host_*")) + if err != nil { + log.Debug().Err(err).Msg("failed to list ssh host keys for a vm image") + } + + for _, file := range files { + if err := os.Remove(file); err != nil { + log.Debug().Err(err).Str("file", file).Msg("failed to delete host key file") + } + } + } + + // inject container kernel and init + machine.KernelImage = filepath.Join(cloudImage, "kernel") + machine.InitrdImage = filepath.Join(cloudImage, "initramfs-linux.img") + + // can be overridden from the flist itself if exists + if len(imageInfo.KernelPath) != 0 { + machine.KernelImage = imageInfo.KernelPath + machine.InitrdImage = imageInfo.InitrdPath + // we are using kernel from flist, we need to respect + // user init + if len(config.Entrypoint) != 0 { + machine.KernelArgs["init"] = config.Entrypoint + } + } + + machine.Boot = pkg.Boot{ + Type: pkg.BootVirtioFS, + Path: mnt, + } + + if err := fListStartup(config, filepath.Join(mnt, ".startup.toml")); err != nil { + return errors.Wrap(err, "failed to apply startup config from flist") + } + + if err := p.vmMounts(ctx, deployment, config.Mounts, true, machine); err != nil { + return err + } + if config.Corex { + if err := p.copyFile("/usr/bin/corex", filepath.Join(mnt, "corex"), 0755); err != nil { + return errors.Wrap(err, "failed to inject corex binary") + } + machine.Entrypoint = "/corex --ipv6 -d 7 --interface eth0" + } + + return nil +} + +func (p *Manager) newMyceliumNetworkInterface(ctx context.Context, dl gridtypes.Deployment, wl *gridtypes.WorkloadWithID, config *zos.MyceliumIP) (pkg.VMIface, error) { + network := stubs.NewNetworkerLightStub(p.zbus) + netID := zos.NetworkID(dl.TwinID, config.Network) + + tapName := wl.ID.Unique(string(config.Network)) + iface, err := network.AttachMycelium(ctx, string(netID), tapName, config.Seed) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not set up tap device") + } + + out := pkg.VMIface{ + Tap: iface.Name, + MAC: iface.Mac.String(), + IPs: []net.IPNet{ + *iface.IP, + }, + Routes: iface.Routes, + PublicIPv4: false, + PublicIPv6: false, + } + + return out, nil +} + +func (p *Manager) newPrivNetworkInterface(ctx context.Context, dl gridtypes.Deployment, wl *gridtypes.WorkloadWithID, inf zos.MachineInterface) (pkg.VMIface, error) { + network := stubs.NewNetworkerLightStub(p.zbus) + netID := zos.NetworkID(dl.TwinID, inf.Network) + + tapName := wl.ID.Unique(string(inf.Network)) + iface, err := network.AttachPrivate(ctx, string(netID), tapName, inf.IP) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not set up tap device for private interface") + } + + out := pkg.VMIface{ + Tap: iface.Name, + MAC: iface.Mac.String(), + IPs: []net.IPNet{ + *iface.IP, + // privIP6, + }, + Routes: iface.Routes, + IP4DefaultGateway: iface.Routes[0].Gateway, + // IP6DefaultGateway: gw6, + PublicIPv4: false, + PublicIPv6: false, + NetID: netID, + } + + return out, nil +} + +// FListInfo virtual machine details +type FListInfo struct { + ImagePath string + KernelPath string + InitrdPath string +} + +func (t *FListInfo) IsContainer() bool { + return len(t.ImagePath) == 0 +} + +func getFlistInfo(flistPath string) (flist FListInfo, err error) { + files := map[string]*string{ + "/image.raw": &flist.ImagePath, + "/boot/vmlinuz": &flist.KernelPath, + "/boot/initrd.img": &flist.InitrdPath, + } + + for rel, ptr := range files { + path := filepath.Join(flistPath, rel) + // this path can be a symlink so we need to make sure + // the symlink is pointing to only files inside the + // flist. + + // but we need to validate + stat, err := os.Stat(path) + if os.IsNotExist(err) { + continue + } else if err != nil { + return flist, errors.Wrapf(err, "couldn't stat %s", rel) + } + + if stat.IsDir() { + return flist, fmt.Errorf("path '%s' cannot be a directory", rel) + } + mod := stat.Mode() + switch mod.Type() { + case 0: + // regular file, do nothing + case os.ModeSymlink: + // this is a symlink. we + // need to make sure it's a safe link + // to a location inside the flist + link, err := os.Readlink(path) + if err != nil { + return flist, errors.Wrapf(err, "failed to read link at '%s", rel) + } + // now the link if joined with path, (and cleaned) need to also point + // to somewhere under flistPath + abs := filepath.Clean(filepath.Join(flistPath, link)) + if !strings.HasPrefix(abs, flistPath) { + return flist, fmt.Errorf("path '%s' points to invalid location", rel) + } + default: + return flist, fmt.Errorf("path '%s' is of invalid type: %s", rel, mod.Type().String()) + } + + // set the value + *ptr = path + } + + return flist, nil +} + +type startup struct { + Entries map[string]entry `toml:"startup"` +} + +type entry struct { + Name string + Args args +} + +type args struct { + Name string + Dir string + Args []string + Env map[string]string +} + +func (e entry) Entrypoint() string { + if e.Name == "core.system" || + e.Name == "core.base" && e.Args.Name != "" { + var buf strings.Builder + + buf.WriteString(e.Args.Name) + for _, arg := range e.Args.Args { + buf.WriteRune(' ') + arg = strings.Replace(arg, "\"", "\\\"", -1) + buf.WriteRune('"') + buf.WriteString(arg) + buf.WriteRune('"') + } + + return buf.String() + } + + return "" +} + +func (e entry) WorkingDir() string { + return e.Args.Dir +} + +func (e entry) Envs() map[string]string { + return e.Args.Env +} + +// This code is backward compatible with flist .startup.toml file +// where the flist can define an Entrypoint and some initial environment +// variables. this is used *with* the container configuration like this +// - if no zmachine entry point is defined, use the one from .startup.toml +// - if envs are defined in flist, merge with the env variables from the +func fListStartup(data *zos.ZMachineLight, path string) error { + f, err := os.Open(path) + if os.IsNotExist(err) { + return nil + } else if err != nil { + return errors.Wrapf(err, "failed to load startup file '%s'", path) + } + + defer f.Close() + + log.Info().Msg("startup file found") + startup := startup{} + if _, err := toml.NewDecoder(f).Decode(&startup); err != nil { + return err + } + + entry, ok := startup.Entries["entry"] + if !ok { + return nil + } + + data.Env = mergeEnvs(entry.Envs(), data.Env) + + if data.Entrypoint == "" && entry.Entrypoint() != "" { + data.Entrypoint = entry.Entrypoint() + } + return nil +} + +// mergeEnvs new into base +func mergeEnvs(base, new map[string]string) map[string]string { + if len(base) == 0 { + return new + } + + for k, v := range new { + base[k] = v + } + + return base +} diff --git a/pkg/primitives/vm-light/vm.go b/pkg/primitives/vm-light/vm.go new file mode 100644 index 00000000..dd4be22e --- /dev/null +++ b/pkg/primitives/vm-light/vm.go @@ -0,0 +1,309 @@ +package vmlight + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "os" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +const ( + cloudContainerFlist = "https://hub.grid.tf/tf-autobuilder/cloud-container-9dba60e.flist" + cloudContainerName = "cloud-container" +) + +// ZMachine type +type ZMachine = zos.ZMachineLight + +var ( + _ provision.Manager = (*Manager)(nil) + _ provision.Initializer = (*Manager)(nil) +) + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +func (m *Manager) Initialize(ctx context.Context) error { + return m.initGPUs() +} + +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return p.virtualMachineProvisionImpl(ctx, wl) +} + +func (p *Manager) vmMounts(ctx context.Context, deployment *gridtypes.Deployment, mounts []zos.MachineMount, format bool, vm *pkg.VM) error { + for _, mount := range mounts { + wl, err := deployment.Get(mount.Name) + if err != nil { + return errors.Wrapf(err, "failed to get mount '%s' workload", mount.Name) + } + if wl.Result.State != gridtypes.StateOk { + return fmt.Errorf("invalid disk '%s' state", mount.Name) + } + switch wl.Type { + case zos.ZMountType: + if err := p.mountDisk(ctx, wl, mount, format, vm); err != nil { + return err + } + case zos.QuantumSafeFSType: + if err := p.mountQsfs(wl, mount, vm); err != nil { + return err + } + case zos.VolumeType: + if err := p.mountVolume(ctx, wl, mount, vm); err != nil { + return err + } + default: + return fmt.Errorf("expecting a reservation of type '%s' or '%s' for disk '%s'", zos.ZMountType, zos.QuantumSafeFSType, mount.Name) + } + } + return nil +} + +func (p *Manager) mountDisk(ctx context.Context, wl *gridtypes.WorkloadWithID, mount zos.MachineMount, format bool, vm *pkg.VM) error { + storage := stubs.NewStorageModuleStub(p.zbus) + + info, err := storage.DiskLookup(ctx, wl.ID.String()) + if err != nil { + return errors.Wrapf(err, "failed to inspect disk '%s'", mount.Name) + } + + if format { + if err := storage.DiskFormat(ctx, wl.ID.String()); err != nil { + return errors.Wrap(err, "failed to prepare mount") + } + } + + vm.Disks = append(vm.Disks, pkg.VMDisk{Path: info.Path, Target: mount.Mountpoint}) + + return nil +} + +func (p *Manager) mountVolume(ctx context.Context, wl *gridtypes.WorkloadWithID, mount zos.MachineMount, vm *pkg.VM) error { + storage := stubs.NewStorageModuleStub(p.zbus) + + volume, err := storage.VolumeLookup(ctx, wl.ID.String()) + if err != nil { + return fmt.Errorf("failed to lookup volume %q: %w", wl.ID.String(), err) + } + + vm.Shared = append(vm.Shared, pkg.SharedDir{ID: wl.Name.String(), Path: volume.Path, Target: mount.Mountpoint}) + return nil +} + +func (p *Manager) mountQsfs(wl *gridtypes.WorkloadWithID, mount zos.MachineMount, vm *pkg.VM) error { + var info zos.QuatumSafeFSResult + if err := wl.Result.Unmarshal(&info); err != nil { + return fmt.Errorf("invalid qsfs result '%s': %w", mount.Name, err) + } + + vm.Shared = append(vm.Shared, pkg.SharedDir{ID: wl.Name.String(), Path: info.Path, Target: mount.Mountpoint}) + return nil +} + +func (p *Manager) virtualMachineProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) (result zos.ZMachineLightResult, err error) { + var ( + network = stubs.NewNetworkerLightStub(p.zbus) + flist = stubs.NewFlisterStub(p.zbus) + vm = stubs.NewVMModuleStub(p.zbus) + + config ZMachine + ) + if vm.Exists(ctx, wl.ID.String()) { + return result, provision.ErrNoActionNeeded + } + + if err := json.Unmarshal(wl.Data, &config); err != nil { + return result, errors.Wrap(err, "failed to decode reservation schema") + } + + if len(config.GPU) != 0 && !provision.IsRentedNode(ctx) { + // you cannot use GPU unless this is a rented node + return result, fmt.Errorf("usage of GPU is not allowed unless node is rented") + } + + machine := pkg.VM{ + Name: wl.ID.String(), + CPU: config.ComputeCapacity.CPU, + Memory: config.ComputeCapacity.Memory, + Entrypoint: config.Entrypoint, + KernelArgs: pkg.KernelArgs{}, + } + + // expand GPUs + devices, err := p.expandGPUs(config.GPU) + if err != nil { + return result, errors.Wrap(err, "failed to prepare requested gpu device(s)") + } + + for _, device := range devices { + machine.Devices = append(machine.Devices, device.Slot) + } + + // the config is validated by the engine. we now only support only one + // private network + if len(config.Network.Interfaces) != 1 { + return result, fmt.Errorf("only one private network is support") + } + netConfig := config.Network.Interfaces[0] + + result.ID = wl.ID.String() + result.IP = netConfig.IP.String() + + deployment, err := provision.GetDeployment(ctx) + if err != nil { + return result, errors.Wrap(err, "failed to get deployment") + } + networkInfo := pkg.VMNetworkInfo{ + Nameservers: []net.IP{net.ParseIP("8.8.8.8"), net.ParseIP("1.1.1.1"), net.ParseIP("2001:4860:4860::8888")}, + } + + defer func() { + tapName := wl.ID.Unique(string(config.Network.Mycelium.Network)) + if err != nil { + _ = network.Detach(ctx, tapName) + } + }() + + for _, nic := range config.Network.Interfaces { + inf, err := p.newPrivNetworkInterface(ctx, deployment, wl, nic) + if err != nil { + return result, err + } + networkInfo.Ifaces = append(networkInfo.Ifaces, inf) + } + + if config.Network.Mycelium != nil { + inf, err := p.newMyceliumNetworkInterface(ctx, deployment, wl, config.Network.Mycelium) + if err != nil { + return result, err + } + networkInfo.Ifaces = append(networkInfo.Ifaces, inf) + result.MyceliumIP = inf.IPs[0].IP.String() + } + // - mount flist RO + mnt, err := flist.Mount(ctx, wl.ID.String(), config.FList, pkg.ReadOnlyMountOptions) + if err != nil { + return result, errors.Wrapf(err, "failed to mount flist: %s", wl.ID.String()) + } + + var imageInfo FListInfo + // - detect type (container or VM) + imageInfo, err = getFlistInfo(mnt) + if err != nil { + return result, err + } + + log.Debug().Msgf("detected flist type: %+v", imageInfo) + + // mount cloud-container flist (or reuse) which has kernel, initrd and also firmware + hash, err := flist.FlistHash(ctx, cloudContainerFlist) + if err != nil { + return zos.ZMachineLightResult{}, errors.Wrap(err, "failed to get cloud-container flist hash") + } + + // if the name changes (because flist changed, a new mount will be created) + name := fmt.Sprintf("%s:%s", cloudContainerName, hash) + // now mount cloud image also + cloudImage, err := flist.Mount(ctx, name, cloudContainerFlist, pkg.ReadOnlyMountOptions) + if err != nil { + return result, errors.Wrap(err, "failed to mount cloud container base image") + } + + if imageInfo.IsContainer() { + if err = p.prepContainer(ctx, cloudImage, imageInfo, &machine, &config, &deployment, wl); err != nil { + return result, err + } + } else { + if err = p.prepVirtualMachine(ctx, cloudImage, imageInfo, &machine, &config, &deployment, wl); err != nil { + return result, err + } + } + + // - Attach mounts + // - boot + machine.Network = networkInfo + machine.Environment = config.Env + machine.Hostname = wl.Name.String() + + machineInfo, err := vm.Run(ctx, machine) + if err != nil { + // attempt to delete the vm, should the process still be lingering + log.Error().Err(err).Msg("cleaning up vm deployment duo to an error") + _ = vm.Delete(ctx, wl.ID.String()) + } + result.ConsoleURL = machineInfo.ConsoleURL + return result, err +} + +func (p *Manager) copyFile(srcPath string, destPath string, permissions os.FileMode) error { + src, err := os.Open(srcPath) + if err != nil { + return errors.Wrapf(err, "Coludn't find %s on the node", srcPath) + } + defer src.Close() + dest, err := os.OpenFile(destPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, permissions) + if err != nil { + return errors.Wrapf(err, "Coludn't create %s file", destPath) + } + defer dest.Close() + _, err = io.Copy(dest, src) + if err != nil { + return errors.Wrapf(err, "Couldn't copy to %s", destPath) + } + return nil +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + var ( + flist = stubs.NewFlisterStub(p.zbus) + network = stubs.NewNetworkerLightStub(p.zbus) + vm = stubs.NewVMModuleStub(p.zbus) + storage = stubs.NewStorageModuleStub(p.zbus) + + cfg ZMachine + ) + + if err := json.Unmarshal(wl.Data, &cfg); err != nil { + return errors.Wrap(err, "failed to decode reservation schema") + } + + if _, err := vm.Inspect(ctx, wl.ID.String()); err == nil { + if err := vm.Delete(ctx, wl.ID.String()); err != nil { + return errors.Wrapf(err, "failed to delete vm %s", wl.ID) + } + } + + if err := flist.Unmount(ctx, wl.ID.String()); err != nil { + log.Error().Err(err).Msg("failed to unmount machine flist") + } + + volName := fmt.Sprintf("rootfs:%s", wl.ID.String()) + if err := storage.VolumeDelete(ctx, volName); err != nil { + log.Error().Err(err).Str("name", volName).Msg("failed to delete rootfs volume") + } + + tapName := wl.ID.Unique(string(cfg.Network.Mycelium.Network)) + + if err := network.Detach(ctx, tapName); err != nil { + return errors.Wrap(err, "could not clean up tap device") + } + + return nil +} diff --git a/pkg/primitives/vm/gpu.go b/pkg/primitives/vm/gpu.go new file mode 100644 index 00000000..1d64f95d --- /dev/null +++ b/pkg/primitives/vm/gpu.go @@ -0,0 +1,163 @@ +package vm + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zosbase/pkg/capacity" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/kernel" +) + +const ( + sysDeviceBase = "/sys/bus/pci/devices" + vfioPCIModue = "vfio-pci" +) + +var ( + modules = []string{"vfio", "vfio-pci", "vfio_iommu_type1"} +) + +func (m *Manager) initGPUVfioModules() error { + for _, mod := range modules { + if err := exec.Command("modprobe", mod).Run(); err != nil { + return errors.Wrapf(err, "failed to probe module: %s", mod) + } + } + + // also set unsafe interrupts + if err := os.WriteFile("/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts", []byte{'1'}, 0644); err != nil { + return errors.Wrapf(err, "failed to set allow_unsafe_interrupts for vfio") + } + + return nil +} + +// unbindBootVga is a helper method to disconnect the boot vga if needed +func (m *Manager) unbindBootVga() error { + const vtConsole = "/sys/class/vtconsole" + vts, err := os.ReadDir(vtConsole) + if err != nil && !os.IsNotExist(err) { + return errors.Wrap(err, "failed to list VTs") + } + for _, vt := range vts { + if err := os.WriteFile(filepath.Join(vtConsole, vt.Name(), "bind"), []byte("0"), 0644); err != nil { + // log or return ? + return errors.Wrapf(err, "failed to unbind vt '%s'", vt.Name()) + } + } + + if err := os.WriteFile("/sys/bus/platform/drivers/efi-framebuffer/unbind", []byte("efi-framebuffer.0"), 0644); err != nil { + log.Warn().Err(err).Msg("failed to disable frame-buffer") + } + + return nil +} + +// this function will make sure ALL gpus are bind to the right driver +func (m *Manager) initGPUs() error { + if kernel.GetParams().IsGPUDisabled() { + return nil + } + if err := m.initGPUVfioModules(); err != nil { + return err + } + + gpus, err := capacity.ListPCI(capacity.GPU) + if err != nil { + return errors.Wrap(err, "failed to list system GPUs") + } + + for _, gpu := range gpus { + bootVga, err := gpu.Flag("boot_vga") + if err != nil && !os.IsNotExist(err) { + return errors.Wrapf(err, "failed to read GPU '%s' boot_vga flag", gpu.Slot) + } + + if bootVga > 0 { + if err := m.unbindBootVga(); err != nil { + log.Warn().Err(err).Msg("error while unbinding boot vga") + } + } + + devices, err := capacity.IoMMUGroup(gpu, capacity.Not(capacity.PCIBridge)) + if err != nil { + return errors.Wrapf(err, "failed to list devices in iommu group for '%s'", gpu.Slot) + } + + for _, pci := range devices { + device := filepath.Join(sysDeviceBase, pci.Slot) + driver := filepath.Join(device, "driver") + ln, err := os.Readlink(driver) + if err != nil && !os.IsNotExist(err) { + return errors.Wrap(err, "failed to check device driver") + } + + driverName := filepath.Base(ln) + //note: Base return `.` if path is empty string + if driverName == vfioPCIModue { + // correct driver is bind to the device + continue + } else if driverName != "." { + // another driver is bind to this device! + // this should not happen but we need to be sure + // let's unbind + + if err := os.WriteFile(filepath.Join(driver, "unbind"), []byte(pci.Slot), 0600); err != nil { + return errors.Wrapf(err, "failed to unbind gpu '%s' from driver '%s'", pci.ShortID(), driverName) + } + } + + // we then need to do an override + if err := os.WriteFile(filepath.Join(device, "driver_override"), []byte(vfioPCIModue), 0644); err != nil { + return errors.Wrapf(err, "failed to override the device '%s' driver", pci.Slot) + } + + if err := os.WriteFile("/sys/bus/pci/drivers_probe", []byte(pci.Slot), 0200); err != nil { + return errors.Wrapf(err, "failed to bind device '%s' to vfio", pci.Slot) + } + } + } + + return nil +} + +// expandGPUs expands the set of provided GPUs with all devices in the IoMMU group. +// It's required that all devices in an iommu group to be passed together to a VM +// hence we need that for each GPU in the list add all the devices from each device +// IOMMU group +func (m *Manager) expandGPUs(gpus []zos.GPU) ([]capacity.PCI, error) { + if kernel.GetParams().IsGPUDisabled() { + return nil, fmt.Errorf("GPU is disabled on this node") + } + all, err := capacity.ListPCI(capacity.GPU) + if err != nil { + return nil, errors.Wrap(err, "failed to list available GPUs") + } + + allMap := make(map[string]capacity.PCI) + for _, device := range all { + allMap[device.ShortID()] = device + } + + var devices []capacity.PCI + for _, gpu := range gpus { + device, ok := allMap[string(gpu)] + if !ok { + return nil, fmt.Errorf("unknown GPU id '%s'", gpu) + } + + sub, err := capacity.IoMMUGroup(device, capacity.Not(capacity.PCIBridge)) + if err != nil { + return nil, errors.Wrapf(err, "failed to list all devices belonging to '%s'", device.Slot) + } + + devices = append(devices, sub...) + } + + return devices, nil +} diff --git a/pkg/primitives/vm/pause.go b/pkg/primitives/vm/pause.go new file mode 100644 index 00000000..ee9ed29d --- /dev/null +++ b/pkg/primitives/vm/pause.go @@ -0,0 +1,29 @@ +package vm + +import ( + "context" + + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +func (m *Manager) Pause(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + vm := stubs.NewVMModuleStub(m.zbus) + + if err := vm.Lock(ctx, wl.ID.String(), true); err != nil { + return provision.UnChanged(err) + } + + return provision.Paused() +} + +func (m *Manager) Resume(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + vm := stubs.NewVMModuleStub(m.zbus) + + if err := vm.Lock(ctx, wl.ID.String(), false); err != nil { + return provision.UnChanged(err) + } + + return provision.Ok() +} diff --git a/pkg/primitives/vm/utils.go b/pkg/primitives/vm/utils.go new file mode 100644 index 00000000..07d45ad8 --- /dev/null +++ b/pkg/primitives/vm/utils.go @@ -0,0 +1,532 @@ +package vm + +import ( + "context" + "fmt" + "net" + "os" + "path/filepath" + "strings" + + "github.com/BurntSushi/toml" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zos4/pkg/primitives/pubip" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/network/ifaceutil" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var networkResourceNet = net.IPNet{ + IP: net.ParseIP("100.64.0.0"), + Mask: net.IPv4Mask(0xff, 0xff, 0, 0), +} + +// fill up the VM (machine) object with write boot config for a full virtual machine (with a disk image) +func (p *Manager) prepVirtualMachine( + ctx context.Context, + cloudImage string, + imageInfo FListInfo, + machine *pkg.VM, + config *zos.ZMachine, + deployment *gridtypes.Deployment, + wl *gridtypes.WorkloadWithID, +) error { + storage := stubs.NewStorageModuleStub(p.zbus) + // if a VM the vm has to have at least one mount + if len(config.Mounts) == 0 { + return fmt.Errorf("at least one mount has to be attached for Vm mode") + } + + machine.KernelImage = filepath.Join(cloudImage, "hypervisor-fw") + disk, err := deployment.Get(config.Mounts[0].Name) + if err != nil { + return err + } + + if disk.Type != zos.ZMountType { + return fmt.Errorf("mount is not a valid disk workload") + } + + if disk.Result.State != gridtypes.StateOk { + return fmt.Errorf("boot disk was not deployed correctly") + } + + info, err := storage.DiskLookup(ctx, disk.ID.String()) + if err != nil { + return errors.Wrap(err, "disk does not exist") + } + + // TODO: DiskWrite will not override the disk if it already has a partition table + // or a filesystem. this means that if later the disk is assigned to a new VM with + // a different flist it will have the same old operating system copied from previous + // setup. + if err = storage.DiskWrite(ctx, disk.ID.String(), imageInfo.ImagePath); err != nil { + return errors.Wrap(err, "failed to write image to disk") + } + + machine.Boot = pkg.Boot{ + Type: pkg.BootDisk, + Path: info.Path, + } + + return p.vmMounts(ctx, deployment, config.Mounts[1:], false, machine) +} + +// prepare the machine and fill it up with proper boot flags for a container VM +func (p *Manager) prepContainer( + ctx context.Context, + cloudImage string, + imageInfo FListInfo, + machine *pkg.VM, + config *zos.ZMachine, + deployment *gridtypes.Deployment, + wl *gridtypes.WorkloadWithID, +) error { + // - if Container, remount RW + // prepare for container + var ( + storage = stubs.NewStorageModuleStub(p.zbus) + flist = stubs.NewFlisterStub(p.zbus) + ) + + if err := flist.Unmount(ctx, wl.ID.String()); err != nil { + return errors.Wrapf(err, "failed to unmount flist: %s", wl.ID.String()) + } + rootfsSize := config.RootSize() + // create a persisted volume for the vm. we don't do it automatically + // via the flist, so we have control over when to decomission this volume. + // remounting in RW mode + volName := fmt.Sprintf("rootfs:%s", wl.ID.String()) + + volumeExists, err := storage.VolumeExists(ctx, volName) + if err != nil { + return errors.Wrap(err, "failed to check if vm rootfs exists") + } + + volume, err := storage.VolumeCreate(ctx, volName, rootfsSize) + if err != nil { + return errors.Wrap(err, "failed to create vm rootfs") + } + + defer func() { + if err != nil { + // vm creation failed, + if err := storage.VolumeDelete(ctx, volName); err != nil { + log.Error().Err(err).Str("volume", volName).Msg("failed to delete persisted volume") + } + } + }() + + mnt, err := flist.Mount(ctx, wl.ID.String(), config.FList, pkg.MountOptions{ + ReadOnly: false, + PersistedVolume: volume.Path, + }) + if err != nil { + return errors.Wrapf(err, "failed to mount flist: %s", wl.ID.String()) + } + + // clean up host keys + if !volumeExists { + files, err := filepath.Glob(filepath.Join(mnt, "etc", "ssh", "ssh_host_*")) + if err != nil { + log.Debug().Err(err).Msg("failed to list ssh host keys for a vm image") + } + + for _, file := range files { + if err := os.Remove(file); err != nil { + log.Debug().Err(err).Str("file", file).Msg("failed to delete host key file") + } + } + } + + // inject container kernel and init + machine.KernelImage = filepath.Join(cloudImage, "kernel") + machine.InitrdImage = filepath.Join(cloudImage, "initramfs-linux.img") + + // can be overridden from the flist itself if exists + if len(imageInfo.KernelPath) != 0 { + machine.KernelImage = imageInfo.KernelPath + machine.InitrdImage = imageInfo.InitrdPath + // we are using kernel from flist, we need to respect + // user init + if len(config.Entrypoint) != 0 { + machine.KernelArgs["init"] = config.Entrypoint + } + } + + machine.Boot = pkg.Boot{ + Type: pkg.BootVirtioFS, + Path: mnt, + } + + if err := fListStartup(config, filepath.Join(mnt, ".startup.toml")); err != nil { + return errors.Wrap(err, "failed to apply startup config from flist") + } + + if err := p.vmMounts(ctx, deployment, config.Mounts, true, machine); err != nil { + return err + } + if config.Corex { + if err := p.copyFile("/usr/bin/corex", filepath.Join(mnt, "corex"), 0755); err != nil { + return errors.Wrap(err, "failed to inject corex binary") + } + machine.Entrypoint = "/corex --ipv6 -d 7 --interface eth0" + } + + return nil +} + +func (p *Manager) newMyceliumNetworkInterface(ctx context.Context, dl gridtypes.Deployment, wl *gridtypes.WorkloadWithID, config *zos.MyceliumIP) (pkg.VMIface, error) { + network := stubs.NewNetworkerStub(p.zbus) + netID := zos.NetworkID(dl.TwinID, config.Network) + + tapName := wl.ID.Unique("mycelium") + iface, err := network.SetupMyceliumTap(ctx, tapName, netID, *config) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not set up tap device") + } + + out := pkg.VMIface{ + Tap: iface.Name, + MAC: iface.HW.String(), + IPs: []net.IPNet{ + iface.IP, + }, + Routes: []pkg.Route{ + { + Net: net.IPNet{ + IP: net.ParseIP("400::"), + Mask: net.CIDRMask(7, 128), + }, + Gateway: iface.Gateway.IP, + }, + }, + PublicIPv4: false, + PublicIPv6: false, + } + + return out, nil +} + +func (p *Manager) newYggNetworkInterface(ctx context.Context, wl *gridtypes.WorkloadWithID) (pkg.VMIface, error) { + network := stubs.NewNetworkerStub(p.zbus) + + // TODO: if we use `ygg` as a network name. this will conflict + // if the user has a network that is called `ygg`. + tapName := wl.ID.Unique("ygg") + iface, err := network.SetupYggTap(ctx, tapName) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not set up tap device") + } + + out := pkg.VMIface{ + Tap: iface.Name, + MAC: iface.HW.String(), + IPs: []net.IPNet{ + iface.IP, + }, + Routes: []pkg.Route{ + { + Net: net.IPNet{ + IP: net.ParseIP("200::"), + Mask: net.CIDRMask(7, 128), + }, + Gateway: iface.Gateway.IP, + }, + }, + PublicIPv4: false, + PublicIPv6: false, + } + + return out, nil +} + +func (p *Manager) newPrivNetworkInterface(ctx context.Context, dl gridtypes.Deployment, wl *gridtypes.WorkloadWithID, inf zos.MachineInterface) (pkg.VMIface, error) { + network := stubs.NewNetworkerStub(p.zbus) + netID := zos.NetworkID(dl.TwinID, inf.Network) + + subnet, err := network.GetSubnet(ctx, netID) + if err != nil { + return pkg.VMIface{}, errors.Wrapf(err, "could not get network resource subnet") + } + + inf.IP = inf.IP.To4() + if inf.IP == nil { + return pkg.VMIface{}, fmt.Errorf("invalid IPv4 supplied to wg interface") + } + + if !subnet.Contains(inf.IP) { + return pkg.VMIface{}, fmt.Errorf("IP %s is not part of local nr subnet %s", inf.IP.String(), subnet.String()) + } + + // always the .1/24 ip is reserved + if inf.IP[3] == 1 { + return pkg.VMIface{}, fmt.Errorf("ip %s is reserved", inf.IP.String()) + } + + privNet, err := network.GetNet(ctx, netID) + if err != nil { + return pkg.VMIface{}, errors.Wrapf(err, "could not get network range") + } + + addrCIDR := net.IPNet{ + IP: inf.IP, + Mask: subnet.Mask, + } + + gw4, gw6, err := network.GetDefaultGwIP(ctx, netID) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not get network resource default gateway") + } + + privIP6, err := network.GetIPv6From4(ctx, netID, inf.IP) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not convert private ipv4 to ipv6") + } + + tapName := wl.ID.Unique(string(inf.Network)) + iface, err := network.SetupPrivTap(ctx, netID, tapName) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not set up tap device") + } + + // the mac address uses the global workload id + // this needs to be the same as how we get it in the actual IP reservation + mac := ifaceutil.HardwareAddrFromInputBytes([]byte(tapName)) + + out := pkg.VMIface{ + Tap: iface, + MAC: mac.String(), + IPs: []net.IPNet{ + addrCIDR, privIP6, + }, + Routes: []pkg.Route{ + {Net: privNet, Gateway: gw4}, + {Net: networkResourceNet, Gateway: gw4}, + }, + IP4DefaultGateway: net.IP(gw4), + IP6DefaultGateway: gw6, + PublicIPv4: false, + PublicIPv6: false, + NetID: netID, + } + + return out, nil +} + +func (p *Manager) newPubNetworkInterface(ctx context.Context, deployment gridtypes.Deployment, cfg ZMachine) (pkg.VMIface, error) { + network := stubs.NewNetworkerStub(p.zbus) + ipWl, err := deployment.Get(cfg.Network.PublicIP) + if err != nil { + return pkg.VMIface{}, err + } + + tapName := ipWl.ID.Unique("pub") + + config, err := pubip.GetPubIPConfig(ipWl) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not get public ip config") + } + + pubIface, err := network.SetupPubTap(ctx, tapName) + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "could not set up tap device for public network") + } + + // the mac address uses the global workload id + // this needs to be the same as how we get it in the actual IP reservation + mac := ifaceutil.HardwareAddrFromInputBytes([]byte(tapName)) + + // pubic ip config can has + // - reserved public ipv4 + // - public ipv6 + // - both + // in all cases we have ipv6 it's handed out via slaac, so we don't need + // to set the IP on the interface. We need to configure it ONLY for ipv4 + // hence: + var ips []net.IPNet + var gw4 net.IP + var gw6 net.IP + + if !config.IP.Nil() { + ips = append(ips, config.IP.IPNet) + gw4 = config.Gateway + } + + if !config.IPv6.Nil() { + ips = append(ips, config.IPv6.IPNet) + gw6, err = network.GetPublicIPV6Gateway(ctx) + log.Debug().IPAddr("gw", gw6).Msg("found gateway for ipv6") + if err != nil { + return pkg.VMIface{}, errors.Wrap(err, "failed to get the default gateway for ipv6") + } + } + + return pkg.VMIface{ + Tap: pubIface, + MAC: mac.String(), // mac so we always get the same IPv6 from slaac + IPs: ips, + IP4DefaultGateway: gw4, + IP6DefaultGateway: gw6, + PublicIPv4: config.HasIPv4(), + PublicIPv6: config.HasIPv6(), + }, nil +} + +// FListInfo virtual machine details +type FListInfo struct { + ImagePath string + KernelPath string + InitrdPath string +} + +func (t *FListInfo) IsContainer() bool { + return len(t.ImagePath) == 0 +} + +func getFlistInfo(flistPath string) (flist FListInfo, err error) { + files := map[string]*string{ + "/image.raw": &flist.ImagePath, + "/boot/vmlinuz": &flist.KernelPath, + "/boot/initrd.img": &flist.InitrdPath, + } + + for rel, ptr := range files { + path := filepath.Join(flistPath, rel) + // this path can be a symlink so we need to make sure + // the symlink is pointing to only files inside the + // flist. + + // but we need to validate + stat, err := os.Stat(path) + if os.IsNotExist(err) { + continue + } else if err != nil { + return flist, errors.Wrapf(err, "couldn't stat %s", rel) + } + + if stat.IsDir() { + return flist, fmt.Errorf("path '%s' cannot be a directory", rel) + } + mod := stat.Mode() + switch mod.Type() { + case 0: + // regular file, do nothing + case os.ModeSymlink: + // this is a symlink. we + // need to make sure it's a safe link + // to a location inside the flist + link, err := os.Readlink(path) + if err != nil { + return flist, errors.Wrapf(err, "failed to read link at '%s", rel) + } + // now the link if joined with path, (and cleaned) need to also point + // to somewhere under flistPath + abs := filepath.Clean(filepath.Join(flistPath, link)) + if !strings.HasPrefix(abs, flistPath) { + return flist, fmt.Errorf("path '%s' points to invalid location", rel) + } + default: + return flist, fmt.Errorf("path '%s' is of invalid type: %s", rel, mod.Type().String()) + } + + // set the value + *ptr = path + } + + return flist, nil +} + +type startup struct { + Entries map[string]entry `toml:"startup"` +} + +type entry struct { + Name string + Args args +} + +type args struct { + Name string + Dir string + Args []string + Env map[string]string +} + +func (e entry) Entrypoint() string { + if e.Name == "core.system" || + e.Name == "core.base" && e.Args.Name != "" { + var buf strings.Builder + + buf.WriteString(e.Args.Name) + for _, arg := range e.Args.Args { + buf.WriteRune(' ') + arg = strings.Replace(arg, "\"", "\\\"", -1) + buf.WriteRune('"') + buf.WriteString(arg) + buf.WriteRune('"') + } + + return buf.String() + } + + return "" +} + +func (e entry) WorkingDir() string { + return e.Args.Dir +} + +func (e entry) Envs() map[string]string { + return e.Args.Env +} + +// This code is backward compatible with flist .startup.toml file +// where the flist can define an Entrypoint and some initial environment +// variables. this is used *with* the container configuration like this +// - if no zmachine entry point is defined, use the one from .startup.toml +// - if envs are defined in flist, merge with the env variables from the +func fListStartup(data *zos.ZMachine, path string) error { + f, err := os.Open(path) + if os.IsNotExist(err) { + return nil + } else if err != nil { + return errors.Wrapf(err, "failed to load startup file '%s'", path) + } + + defer f.Close() + + log.Info().Msg("startup file found") + startup := startup{} + if _, err := toml.NewDecoder(f).Decode(&startup); err != nil { + return err + } + + entry, ok := startup.Entries["entry"] + if !ok { + return nil + } + + data.Env = mergeEnvs(entry.Envs(), data.Env) + + if data.Entrypoint == "" && entry.Entrypoint() != "" { + data.Entrypoint = entry.Entrypoint() + } + return nil +} + +// mergeEnvs new into base +func mergeEnvs(base, new map[string]string) map[string]string { + if len(base) == 0 { + return new + } + + for k, v := range new { + base[k] = v + } + + return base +} diff --git a/pkg/primitives/vm/vm.go b/pkg/primitives/vm/vm.go new file mode 100644 index 00000000..8d9cd496 --- /dev/null +++ b/pkg/primitives/vm/vm.go @@ -0,0 +1,378 @@ +package vm + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "os" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +const ( + cloudContainerFlist = "https://hub.grid.tf/tf-autobuilder/cloud-container-9dba60e.flist" + cloudContainerName = "cloud-container" +) + +// ZMachine type +type ZMachine = zos.ZMachine + +var ( + _ provision.Manager = (*Manager)(nil) + _ provision.Initializer = (*Manager)(nil) +) + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +func (m *Manager) Initialize(ctx context.Context) error { + return m.initGPUs() +} + +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return p.virtualMachineProvisionImpl(ctx, wl) +} + +func (p *Manager) vmMounts(ctx context.Context, deployment *gridtypes.Deployment, mounts []zos.MachineMount, format bool, vm *pkg.VM) error { + for _, mount := range mounts { + wl, err := deployment.Get(mount.Name) + if err != nil { + return errors.Wrapf(err, "failed to get mount '%s' workload", mount.Name) + } + if wl.Result.State != gridtypes.StateOk { + return fmt.Errorf("invalid disk '%s' state", mount.Name) + } + switch wl.Type { + case zos.ZMountType: + if err := p.mountDisk(ctx, wl, mount, format, vm); err != nil { + return err + } + case zos.QuantumSafeFSType: + if err := p.mountQsfs(wl, mount, vm); err != nil { + return err + } + case zos.VolumeType: + if err := p.mountVolume(ctx, wl, mount, vm); err != nil { + return err + } + default: + return fmt.Errorf("expecting a reservation of type '%s' or '%s' for disk '%s'", zos.ZMountType, zos.QuantumSafeFSType, mount.Name) + } + } + return nil +} + +func (p *Manager) mountDisk(ctx context.Context, wl *gridtypes.WorkloadWithID, mount zos.MachineMount, format bool, vm *pkg.VM) error { + storage := stubs.NewStorageModuleStub(p.zbus) + + info, err := storage.DiskLookup(ctx, wl.ID.String()) + if err != nil { + return errors.Wrapf(err, "failed to inspect disk '%s'", mount.Name) + } + + if format { + if err := storage.DiskFormat(ctx, wl.ID.String()); err != nil { + return errors.Wrap(err, "failed to prepare mount") + } + } + + vm.Disks = append(vm.Disks, pkg.VMDisk{Path: info.Path, Target: mount.Mountpoint}) + + return nil +} + +func (p *Manager) mountVolume(ctx context.Context, wl *gridtypes.WorkloadWithID, mount zos.MachineMount, vm *pkg.VM) error { + storage := stubs.NewStorageModuleStub(p.zbus) + + volume, err := storage.VolumeLookup(ctx, wl.ID.String()) + if err != nil { + return fmt.Errorf("failed to lookup volume %q: %w", wl.ID.String(), err) + } + + vm.Shared = append(vm.Shared, pkg.SharedDir{ID: wl.Name.String(), Path: volume.Path, Target: mount.Mountpoint}) + return nil +} + +func (p *Manager) mountQsfs(wl *gridtypes.WorkloadWithID, mount zos.MachineMount, vm *pkg.VM) error { + var info zos.QuatumSafeFSResult + if err := wl.Result.Unmarshal(&info); err != nil { + return fmt.Errorf("invalid qsfs result '%s': %w", mount.Name, err) + } + + vm.Shared = append(vm.Shared, pkg.SharedDir{ID: wl.Name.String(), Path: info.Path, Target: mount.Mountpoint}) + return nil +} + +func (p *Manager) virtualMachineProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) (result zos.ZMachineResult, err error) { + var ( + network = stubs.NewNetworkerStub(p.zbus) + flist = stubs.NewFlisterStub(p.zbus) + vm = stubs.NewVMModuleStub(p.zbus) + + config ZMachine + ) + if vm.Exists(ctx, wl.ID.String()) { + return result, provision.ErrNoActionNeeded + } + + if err := json.Unmarshal(wl.Data, &config); err != nil { + return result, errors.Wrap(err, "failed to decode reservation schema") + } + + if len(config.GPU) != 0 && !provision.IsRentedNode(ctx) { + // you cannot use GPU unless this is a rented node + return result, fmt.Errorf("usage of GPU is not allowed unless node is rented") + } + + machine := pkg.VM{ + Name: wl.ID.String(), + CPU: config.ComputeCapacity.CPU, + Memory: config.ComputeCapacity.Memory, + Entrypoint: config.Entrypoint, + KernelArgs: pkg.KernelArgs{}, + } + + // expand GPUs + devices, err := p.expandGPUs(config.GPU) + if err != nil { + return result, errors.Wrap(err, "failed to prepare requested gpu device(s)") + } + + for _, device := range devices { + machine.Devices = append(machine.Devices, device.Slot) + } + + // the config is validated by the engine. we now only support only one + // private network + if len(config.Network.Interfaces) != 1 { + return result, fmt.Errorf("only one private network is support") + } + netConfig := config.Network.Interfaces[0] + + // check if public ipv4 is supported, should this be requested + if !config.Network.PublicIP.IsEmpty() && !network.PublicIPv4Support(ctx) { + return result, errors.New("public ipv4 is requested, but not supported on this node") + } + + result.ID = wl.ID.String() + result.IP = netConfig.IP.String() + + deployment, err := provision.GetDeployment(ctx) + if err != nil { + return result, errors.Wrap(err, "failed to get deployment") + } + networkInfo := pkg.VMNetworkInfo{ + Nameservers: []net.IP{net.ParseIP("8.8.8.8"), net.ParseIP("1.1.1.1"), net.ParseIP("2001:4860:4860::8888")}, + } + + var ifs []string + var pubIf string + + defer func() { + if err != nil { + for _, nic := range ifs { + _ = network.RemoveTap(ctx, nic) + } + if pubIf != "" { + _ = network.DisconnectPubTap(ctx, pubIf) + } + } + }() + + for _, nic := range config.Network.Interfaces { + inf, err := p.newPrivNetworkInterface(ctx, deployment, wl, nic) + if err != nil { + return result, err + } + ifs = append(ifs, wl.ID.Unique(string(nic.Network))) + networkInfo.Ifaces = append(networkInfo.Ifaces, inf) + } + + if !config.Network.PublicIP.IsEmpty() { + // some public access is required, we need to add a public + // interface to the machine with the right config. + inf, err := p.newPubNetworkInterface(ctx, deployment, config) + if err != nil { + return result, err + } + + ipWl, _ := deployment.Get(config.Network.PublicIP) + pubIf = ipWl.ID.Unique("pub") + ifs = append(ifs, pubIf) + networkInfo.Ifaces = append(networkInfo.Ifaces, inf) + } + + if config.Network.Planetary { + inf, err := p.newYggNetworkInterface(ctx, wl) + if err != nil { + return result, err + } + ifs = append(ifs, wl.ID.Unique("ygg")) + + log.Debug().Msgf("Planetary: %+v", inf) + networkInfo.Ifaces = append(networkInfo.Ifaces, inf) + result.PlanetaryIP = inf.IPs[0].IP.String() + } + + if config.Network.Mycelium != nil { + inf, err := p.newMyceliumNetworkInterface(ctx, deployment, wl, config.Network.Mycelium) + if err != nil { + return result, err + } + ifs = append(ifs, wl.ID.Unique("mycelium")) + networkInfo.Ifaces = append(networkInfo.Ifaces, inf) + result.MyceliumIP = inf.IPs[0].IP.String() + } + // - mount flist RO + mnt, err := flist.Mount(ctx, wl.ID.String(), config.FList, pkg.ReadOnlyMountOptions) + if err != nil { + return result, errors.Wrapf(err, "failed to mount flist: %s", wl.ID.String()) + } + + var imageInfo FListInfo + // - detect type (container or VM) + imageInfo, err = getFlistInfo(mnt) + if err != nil { + return result, err + } + + log.Debug().Msgf("detected flist type: %+v", imageInfo) + + // mount cloud-container flist (or reuse) which has kernel, initrd and also firmware + hash, err := flist.FlistHash(ctx, cloudContainerFlist) + if err != nil { + return zos.ZMachineResult{}, errors.Wrap(err, "failed to get cloud-container flist hash") + } + + // if the name changes (because flist changed, a new mount will be created) + name := fmt.Sprintf("%s:%s", cloudContainerName, hash) + // now mount cloud image also + cloudImage, err := flist.Mount(ctx, name, cloudContainerFlist, pkg.ReadOnlyMountOptions) + if err != nil { + return result, errors.Wrap(err, "failed to mount cloud container base image") + } + + if imageInfo.IsContainer() { + if err = p.prepContainer(ctx, cloudImage, imageInfo, &machine, &config, &deployment, wl); err != nil { + return result, err + } + } else { + if err = p.prepVirtualMachine(ctx, cloudImage, imageInfo, &machine, &config, &deployment, wl); err != nil { + return result, err + } + } + + // - Attach mounts + // - boot + machine.Network = networkInfo + machine.Environment = config.Env + machine.Hostname = wl.Name.String() + + machineInfo, err := vm.Run(ctx, machine) + if err != nil { + // attempt to delete the vm, should the process still be lingering + log.Error().Err(err).Msg("cleaning up vm deployment duo to an error") + _ = vm.Delete(ctx, wl.ID.String()) + } + result.ConsoleURL = machineInfo.ConsoleURL + return result, err +} + +func (p *Manager) copyFile(srcPath string, destPath string, permissions os.FileMode) error { + src, err := os.Open(srcPath) + if err != nil { + return errors.Wrapf(err, "Coludn't find %s on the node", srcPath) + } + defer src.Close() + dest, err := os.OpenFile(destPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, permissions) + if err != nil { + return errors.Wrapf(err, "Coludn't create %s file", destPath) + } + defer dest.Close() + _, err = io.Copy(dest, src) + if err != nil { + return errors.Wrapf(err, "Couldn't copy to %s", destPath) + } + return nil +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + var ( + flist = stubs.NewFlisterStub(p.zbus) + network = stubs.NewNetworkerStub(p.zbus) + vm = stubs.NewVMModuleStub(p.zbus) + storage = stubs.NewStorageModuleStub(p.zbus) + + cfg ZMachine + ) + + if err := json.Unmarshal(wl.Data, &cfg); err != nil { + return errors.Wrap(err, "failed to decode reservation schema") + } + + if _, err := vm.Inspect(ctx, wl.ID.String()); err == nil { + if err := vm.Delete(ctx, wl.ID.String()); err != nil { + return errors.Wrapf(err, "failed to delete vm %s", wl.ID) + } + } + + if err := flist.Unmount(ctx, wl.ID.String()); err != nil { + log.Error().Err(err).Msg("failed to unmount machine flist") + } + + volName := fmt.Sprintf("rootfs:%s", wl.ID.String()) + if err := storage.VolumeDelete(ctx, volName); err != nil { + log.Error().Err(err).Str("name", volName).Msg("failed to delete rootfs volume") + } + + for _, inf := range cfg.Network.Interfaces { + tapName := wl.ID.Unique(string(inf.Network)) + + if err := network.RemoveTap(ctx, tapName); err != nil { + return errors.Wrap(err, "could not clean up tap device") + } + } + + if cfg.Network.Planetary { + var tapName string + if cfg.Network.Mycelium == nil { + // yggdrasil network + tapName = wl.ID.Unique("ygg") + } else { + tapName = wl.ID.Unique("mycelium") + } + + if err := network.RemoveTap(ctx, tapName); err != nil { + return errors.Wrap(err, "could not clean up tap device") + } + } + + if len(cfg.Network.PublicIP) > 0 { + // TODO: we need to make sure workload status reflects the actual status by the engine + // this is not the case anymore. + ipWl, err := provision.GetWorkload(ctx, cfg.Network.PublicIP) + if err != nil { + return err + } + ifName := ipWl.ID.Unique("pub") + if err := network.RemovePubTap(ctx, ifName); err != nil { + return errors.Wrap(err, "could not clean up public tap device") + } + } + + return nil +} diff --git a/pkg/primitives/volume/volume.go b/pkg/primitives/volume/volume.go new file mode 100644 index 00000000..0c660b9f --- /dev/null +++ b/pkg/primitives/volume/volume.go @@ -0,0 +1,105 @@ +package volume + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +type ( + Volume = zos.Volume + VolumeResult = zos.VolumeResult +) + +type Manager struct { + client zbus.Client +} + +var ( + _ provision.Manager = (*Manager)(nil) + _ provision.Updater = (*Manager)(nil) +) + +func NewManager(client zbus.Client) Manager { + return Manager{client: client} +} + +func (m Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + storage := stubs.NewStorageModuleStub(m.client) + + var volume Volume + if err := json.Unmarshal(wl.Data, &volume); err != nil { + return nil, fmt.Errorf("failed to parse workload data as volume: %w", err) + } + volumeName := wl.ID.String() + + exists, err := storage.VolumeExists(ctx, volumeName) + if err != nil { + return nil, fmt.Errorf("failed to lookup volume with name %q: %w", volumeName, err) + } else if exists { + return VolumeResult{ID: volumeName}, provision.ErrNoActionNeeded + } + + vol, err := storage.VolumeCreate(ctx, volumeName, volume.Size) + if err != nil { + return nil, fmt.Errorf("failed to create new volume with name %q: %w", volumeName, err) + } + return VolumeResult{ID: vol.Name}, nil +} + +func (m Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + storage := stubs.NewStorageModuleStub(m.client) + + var volume Volume + if err := json.Unmarshal(wl.Data, &volume); err != nil { + return fmt.Errorf("failed to parse workload data as volume: %w", err) + } + + volumeName := wl.ID.String() + + exists, err := storage.VolumeExists(ctx, volumeName) + if err != nil { + return fmt.Errorf("failed to lookup volume %q: %w", volumeName, err) + } else if !exists { + return fmt.Errorf("no volume with name %q found: %w", volumeName, err) + } + + return storage.VolumeDelete(ctx, volumeName) +} + +func (m Manager) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + storage := stubs.NewStorageModuleStub(m.client) + + var volume Volume + if err := json.Unmarshal(wl.Data, &volume); err != nil { + return nil, fmt.Errorf("failed to parse workload data as volume: %w", err) + } + volumeName := wl.ID.String() + + exists, err := storage.VolumeExists(ctx, volumeName) + if err != nil { + return nil, fmt.Errorf("failed to lookup volume %q: %w", volumeName, err) + } else if !exists { + return nil, fmt.Errorf("no volume with name %q found: %w", volumeName, err) + } + + vol, err := storage.VolumeLookup(ctx, volumeName) + if err != nil { + return nil, fmt.Errorf("failed to lookup volume %q: %w", volumeName, err) + } + + if volume.Size < vol.Usage.Size { + return nil, fmt.Errorf("cannot shrink volume to be less than provisioned space. old: %d, requested: %d", vol.Usage.Size, volume.Size) + } + + if err := storage.VolumeUpdate(ctx, volumeName, volume.Size); err != nil { + return nil, fmt.Errorf("failed to update volume %q: %w", volumeName, err) + } + return VolumeResult{ID: volumeName}, nil +} diff --git a/pkg/primitives/zdb/zdb.go b/pkg/primitives/zdb/zdb.go new file mode 100644 index 00000000..26717ab7 --- /dev/null +++ b/pkg/primitives/zdb/zdb.go @@ -0,0 +1,1118 @@ +package zdb + +import ( + "context" + "encoding/json" + "fmt" + "net" + "os" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/cenkalti/backoff/v3" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/kernel" + "github.com/threefoldtech/zosbase/pkg/zdb" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zosbase/pkg" + nwmod "github.com/threefoldtech/zosbase/pkg/network" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +const ( + // https://hub.grid.tf/api/flist/tf-autobuilder/threefoldtech-0-db-development.flist/light + // To get the latest symlink pointer + zdbFlistURL = "https://hub.grid.tf/tf-autobuilder/threefoldtech-0-db-release-v2.0.8-55737c9202.flist" + zdbContainerNS = "zdb" + zdbContainerDataMnt = "/zdb" + zdbPort = 9900 +) + +var uuidRegex = regexp.MustCompile(`([0-9a-f]{8})\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}`) + +// ZDB types +type ZDB = zos.ZDB + +type tZDBContainer pkg.Container + +type safeError struct { + error +} + +func newSafeError(err error) error { + if err == nil { + return nil + } + return &safeError{err} +} + +func (se *safeError) Unwrap() error { + return se.error +} + +func (se *safeError) Error() string { + return uuidRegex.ReplaceAllString(se.error.Error(), `$1-***`) +} + +func (z *tZDBContainer) DataMount() (string, error) { + for _, mnt := range z.Mounts { + if mnt.Target == zdbContainerDataMnt { + return mnt.Source, nil + } + } + + return "", fmt.Errorf("container '%s' does not have a valid data mount", z.Name) +} + +var ( + _ provision.Manager = (*Manager)(nil) + _ provision.Updater = (*Manager)(nil) + _ provision.Initializer = (*Manager)(nil) + _ provision.Pauser = (*Manager)(nil) +) + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + res, err := p.zdbProvisionImpl(ctx, wl) + return res, newSafeError(err) +} + +func (p *Manager) zdbListContainers(ctx context.Context) (map[pkg.ContainerID]tZDBContainer, error) { + contmod := stubs.NewContainerModuleStub(p.zbus) + + containerIDs, err := contmod.List(ctx, zdbContainerNS) + if err != nil { + return nil, errors.Wrap(err, "failed to list running containers") + } + + // for each container we try to find a free space to jam in this new zdb namespace + // request + m := make(map[pkg.ContainerID]tZDBContainer) + + for _, containerID := range containerIDs { + container, err := contmod.Inspect(ctx, zdbContainerNS, containerID) + if err != nil { + log.Error().Err(err).Str("container-id", string(containerID)).Msg("failed to inspect zdb container") + continue + } + cont := tZDBContainer(container) + + if _, err = cont.DataMount(); err != nil { + log.Error().Err(err).Msg("failed to get data directory of zdb container") + continue + } + m[containerID] = cont + } + + return m, nil +} + +func (p *Manager) zdbProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) (zos.ZDBResult, error) { + var containerIPs []net.IP + var ( + // contmod = stubs.NewContainerModuleStub(p.zbus) + storage = stubs.NewStorageModuleStub(p.zbus) + nsID = wl.ID.String() + config ZDB + ) + if err := json.Unmarshal(wl.Data, &config); err != nil { + return zos.ZDBResult{}, errors.Wrap(err, "failed to decode reservation schema") + } + + // for each container we try to find a free space to jam in this new zdb namespace + // request + containers, err := p.zdbListContainers(ctx) + if err != nil { + return zos.ZDBResult{}, errors.Wrap(err, "failed to list container data volumes") + } + + var candidates []tZDBContainer + // check if namespace already exist + for id, container := range containers { + dataPath, _ := container.DataMount() // the error should not happen + + index := zdb.NewIndex(dataPath) + nss, err := index.Namespaces() + if err != nil { + // skip or error + log.Error().Err(err).Str("container-id", string(id)).Msg("couldn't list namespaces") + continue + } + + for _, ns := range nss { + if ns.Name != nsID { + continue + } + if kernel.GetParams().IsLight() { + containerIPs, err = p.waitZDBIPsLight(ctx, container.Network.Namespace, container.CreatedAt) + } else { + containerIPs, err = p.waitZDBIPs(ctx, container.Network.Namespace, container.CreatedAt) + } + if err != nil { + return zos.ZDBResult{}, errors.Wrap(err, "failed to find IP address on zdb0 interface") + } + + return zos.ZDBResult{ + Namespace: nsID, + IPs: ipsToString(containerIPs), + Port: zdbPort, + }, nil + } + + device, err := storage.DeviceLookup(ctx, container.Name) + if err != nil { + log.Error().Err(err).Str("container", string(id)).Msg("failed to inspect zdb device") + continue + } + + if uint64(device.Usage.Used)+uint64(config.Size) <= uint64(device.Usage.Size) { + candidates = append(candidates, container) + } + } + + var cont tZDBContainer + if len(candidates) > 0 { + cont = candidates[0] + } else { + // allocate new disk + device, err := storage.DeviceAllocate(ctx, config.Size) + if err != nil { + return zos.ZDBResult{}, errors.Wrap(err, "couldn't allocate device to satisfy namespace size") + } + cont, err = p.ensureZdbContainer(ctx, device) + if err != nil { + return zos.ZDBResult{}, errors.Wrap(err, "failed to start zdb container") + } + } + if kernel.GetParams().IsLight() { + containerIPs, err = p.waitZDBIPsLight(ctx, cont.Network.Namespace, cont.CreatedAt) + } else { + containerIPs, err = p.waitZDBIPs(ctx, cont.Network.Namespace, cont.CreatedAt) + } + if err != nil { + return zos.ZDBResult{}, errors.Wrap(err, "failed to find IP address on zdb0 interface") + } + + log.Warn().Msgf("ip for zdb containers %s", containerIPs) + // this call will actually configure the namespace in zdb and set the password + if err := p.createZDBNamespace(pkg.ContainerID(cont.Name), nsID, config); err != nil { + return zos.ZDBResult{}, errors.Wrap(err, "failed to create zdb namespace") + } + + return zos.ZDBResult{ + Namespace: nsID, + IPs: ipsToString(containerIPs), + Port: zdbPort, + }, nil +} + +func (p *Manager) ensureZdbContainer(ctx context.Context, device pkg.Device) (tZDBContainer, error) { + container := stubs.NewContainerModuleStub(p.zbus) + name := pkg.ContainerID(device.ID) + + cont, err := container.Inspect(ctx, zdbContainerNS, name) + if err != nil && strings.Contains(err.Error(), "not found") { + // container not found, create one + var err error + if kernel.GetParams().IsLight() { + err = p.createZdbContainerLight(ctx, device) + } else { + err = p.createZdbContainer(ctx, device) + } + if err != nil { + return tZDBContainer(cont), err + } + cont, err = container.Inspect(ctx, zdbContainerNS, name) + if err != nil { + return tZDBContainer{}, err + } + } else if err != nil { + // other error + return tZDBContainer{}, err + } + + return tZDBContainer(cont), nil +} + +func (p *Manager) zdbRootFS(ctx context.Context) (string, error) { + flist := stubs.NewFlisterStub(p.zbus) + var err error + var rootFS string + + hash, err := flist.FlistHash(ctx, zdbFlistURL) + if err != nil { + return "", errors.Wrap(err, "failed to get flist hash") + } + + rootFS, err = flist.Mount(ctx, hash, zdbFlistURL, pkg.MountOptions{ + Limit: 10 * gridtypes.Megabyte, + ReadOnly: false, + }) + if err != nil { + return "", errors.Wrap(err, "failed to mount zdb flist") + } + + return rootFS, nil +} + +func (p *Manager) createZdbContainer(ctx context.Context, device pkg.Device) error { + var ( + name = pkg.ContainerID(device.ID) + cont = stubs.NewContainerModuleStub(p.zbus) + flist = stubs.NewFlisterStub(p.zbus) + volumePath = device.Path + network = stubs.NewNetworkerStub(p.zbus) + + slog = log.With().Str("containerID", string(name)).Logger() + ) + + slog.Debug().Str("flist", zdbFlistURL).Msg("mounting flist") + + rootFS, err := p.zdbRootFS(ctx) + if err != nil { + return err + } + + cleanup := func() { + if err := cont.Delete(ctx, zdbContainerNS, name); err != nil { + slog.Error().Err(err).Msg("failed to delete 0-db container") + } + + if err := flist.Unmount(ctx, string(name)); err != nil { + slog.Error().Err(err).Str("path", rootFS).Msgf("failed to unmount") + } + } + + // create the network namespace and macvlan for the 0-db container + netNsName, err := network.EnsureZDBPrepare(ctx, device.ID) + if err != nil { + if err := flist.Unmount(ctx, string(name)); err != nil { + slog.Error().Err(err).Str("path", rootFS).Msgf("failed to unmount") + } + + return errors.Wrap(err, "failed to prepare zdb network") + } + + socketDir := socketDir(name) + if err := os.MkdirAll(socketDir, 0550); err != nil && !os.IsExist(err) { + return errors.Wrapf(err, "failed to create directory: %s", socketDir) + } + + cl := zdbConnection(name) + if err := cl.Connect(); err == nil { + // it seems there is a running container already + cl.Close() + return nil + } + + // make sure the file does not exist otherwise we get the address already in use error + if err := os.Remove(socketFile(name)); err != nil && !os.IsNotExist(err) { + return err + } + + cmd := fmt.Sprintf("/bin/zdb --protect --admin '%s' --data /zdb/data --index /zdb/index --listen :: --port %d --socket /socket/zdb.sock --dualnet", device.ID, zdbPort) + + err = p.zdbRun(ctx, string(name), rootFS, cmd, netNsName, volumePath, socketDir) + if err != nil { + cleanup() + return errors.Wrap(err, "failed to create container") + } + + cl = zdbConnection(name) + defer cl.Close() + + bo := backoff.NewExponentialBackOff() + bo.MaxInterval = time.Second * 20 + bo.MaxElapsedTime = time.Minute * 2 + + if err := backoff.RetryNotify(cl.Connect, bo, func(err error, d time.Duration) { + log.Debug().Err(err).Str("duration", d.String()).Msg("waiting for zdb to start") + }); err != nil { + cleanup() + return errors.Wrapf(err, "failed to establish connection to zdb") + } + + return nil +} + +// dataMigration will make sure that we delete any data files from v1. This is +// hardly a data migration but at this early stage it's fine since there is still +// no real data loads live on the grid. All v2 zdbs, will be safe. +func (p *Manager) dataMigration(ctx context.Context, volume string) { + v1, _ := zdb.IsZDBVersion1(ctx, volume) + // TODO: what if there is an error? + if !v1 { + // it's eather a new volume, or already on version 2 + // so nothing to do. + return + } + + for _, sub := range []string{"data", "index"} { + if err := os.RemoveAll(filepath.Join(volume, sub)); err != nil { + log.Error().Err(err).Msg("failed to delete obsolete data directories") + } + } +} + +func (p *Manager) createZdbContainerLight(ctx context.Context, device pkg.Device) error { + var ( + name = pkg.ContainerID(device.ID) + cont = stubs.NewContainerModuleStub(p.zbus) + flist = stubs.NewFlisterStub(p.zbus) + volumePath = device.Path + network = stubs.NewNetworkerLightStub(p.zbus) + + slog = log.With().Str("containerID", string(name)).Logger() + ) + + slog.Debug().Str("flist", zdbFlistURL).Msg("mounting flist") + + rootFS, err := p.zdbRootFS(ctx) + if err != nil { + return err + } + + cleanup := func() { + if err := cont.Delete(ctx, zdbContainerNS, name); err != nil { + slog.Error().Err(err).Msg("failed to delete 0-db container") + } + + if err := flist.Unmount(ctx, string(name)); err != nil { + slog.Error().Err(err).Str("path", rootFS).Msgf("failed to unmount") + } + } + + // create the network namespace and macvlan for the 0-db container + netNsName, err := network.AttachZDB(ctx, device.ID) + if err != nil { + if err := flist.Unmount(ctx, string(name)); err != nil { + slog.Error().Err(err).Str("path", rootFS).Msgf("failed to unmount") + } + + return errors.Wrap(err, "failed to prepare zdb network") + } + + socketDir := socketDir(name) + if err := os.MkdirAll(socketDir, 0550); err != nil && !os.IsExist(err) { + return errors.Wrapf(err, "failed to create directory: %s", socketDir) + } + + cl := zdbConnection(name) + if err := cl.Connect(); err == nil { + // it seems there is a running container already + cl.Close() + return nil + } + + // make sure the file does not exist otherwise we get the address already in use error + if err := os.Remove(socketFile(name)); err != nil && !os.IsNotExist(err) { + return err + } + + cmd := fmt.Sprintf("/bin/zdb --protect --admin '%s' --data /zdb/data --index /zdb/index --listen :: --port %d --socket /socket/zdb.sock --dualnet", device.ID, zdbPort) + + err = p.zdbRun(ctx, string(name), rootFS, cmd, netNsName, volumePath, socketDir) + if err != nil { + cleanup() + return errors.Wrap(err, "failed to create container") + } + + cl = zdbConnection(name) + defer cl.Close() + + bo := backoff.NewExponentialBackOff() + bo.MaxInterval = time.Second * 20 + bo.MaxElapsedTime = time.Minute * 2 + + if err := backoff.RetryNotify(cl.Connect, bo, func(err error, d time.Duration) { + log.Debug().Err(err).Str("duration", d.String()).Msg("waiting for zdb to start") + }); err != nil { + cleanup() + return errors.Wrapf(err, "failed to establish connection to zdb") + } + + return nil +} + +func (p *Manager) zdbRun(ctx context.Context, name string, rootfs string, cmd string, netns string, volumepath string, socketdir string) error { + cont := stubs.NewContainerModuleStub(p.zbus) + + // we do data migration here because this is called + // on new zdb starts, or updating the runtime. + p.dataMigration(ctx, volumepath) + + conf := pkg.Container{ + Name: name, + RootFS: rootfs, + Entrypoint: cmd, + Interactive: false, + Network: pkg.NetworkInfo{Namespace: netns}, + Mounts: []pkg.MountInfo{ + { + Source: volumepath, + Target: zdbContainerDataMnt, + }, + { + Source: socketdir, + Target: "/socket", + }, + }, + } + + _, err := cont.Run( + ctx, + zdbContainerNS, + conf, + ) + + return err +} + +func (p *Manager) waitZDBIPs(ctx context.Context, namespace string, created time.Time) ([]net.IP, error) { + // TODO: this method need to be abstracted, since it's now depends on the knewledge + // of the networking daemon internal (interfaces names) + // may be at least just get all ips from all interfaces inside the namespace + // will be a slightly better solution + var ( + network = stubs.NewNetworkerStub(p.zbus) + containerIPs []net.IP + ) + + log.Debug().Time("created-at", created).Str("namespace", namespace).Msg("checking zdb container ips") + getIP := func() error { + // some older setups that might still be running has PubIface set to zdb0 not eth0 + // so we need to make sure that this we also try this older name + ips, _, err := network.Addrs(ctx, nwmod.PubIface, namespace) + if err != nil { + var err2 error + ips, _, err2 = network.Addrs(ctx, "zdb0", namespace) + if err2 != nil { + log.Debug().Err(err).Msg("no public ip found, waiting") + return err + } + } + + yggIps, _, err := network.Addrs(ctx, nwmod.ZDBYggIface, namespace) + if err != nil { + return err + } + ips = append(ips, yggIps...) + + MyceliumIps, _, err := network.Addrs(ctx, nwmod.ZDBMyceliumIface, namespace) + if err != nil { + return err + } + ips = append(ips, MyceliumIps...) + + var ( + public = false + ygg = false + mycelium = false + ) + containerIPs = containerIPs[:0] + + for _, ip := range ips { + if isPublic(ip) && !isYgg(ip) && !isMycelium(ip) { + log.Warn().IPAddr("ip", ip).Msg("0-db container public ip found") + public = true + containerIPs = append(containerIPs, ip) + } + if isYgg(ip) { + log.Warn().IPAddr("ip", ip).Msg("0-db container ygg ip found") + ygg = true + containerIPs = append(containerIPs, ip) + } + if isMycelium(ip) { + log.Warn().IPAddr("ip", ip).Msg("0-db container mycelium ip found") + mycelium = true + containerIPs = append(containerIPs, ip) + } + } + + log.Warn().Msgf("public %v ygg: %v mycelium: %v", public, ygg, mycelium) + if public && ygg && mycelium || time.Since(created) > 2*time.Minute { + // if we have all ips detected or if the container is older than 2 minutes + // so it's safe we assume ips are final + return nil + } + return fmt.Errorf("waiting for more addresses") + } + + bo := backoff.NewExponentialBackOff() + bo.MaxInterval = time.Minute + bo.MaxElapsedTime = time.Minute * 2 + + if err := backoff.RetryNotify(getIP, bo, func(err error, d time.Duration) { + log.Debug().Err(err).Str("duration", d.String()).Msg("failed to get zdb public IP") + }); err != nil && len(containerIPs) == 0 { + return nil, errors.Wrapf(err, "failed to get an IP for interface") + } + + return containerIPs, nil +} + +func (p *Manager) waitZDBIPsLight(ctx context.Context, namespace string, created time.Time) ([]net.IP, error) { + // TODO: is there a need for retrying anymore?? + var ( + network = stubs.NewNetworkerLightStub(p.zbus) + containerIPs []net.IP + ) + + log.Debug().Time("created-at", created).Str("namespace", namespace).Msg("checking zdb container ips") + getIP := func() error { + ips, err := network.ZDBIPs(ctx, namespace) + if err != nil { + return err + } + for _, ip := range ips { + containerIPs = append(containerIPs, ip) + } + return nil + } + + bo := backoff.NewExponentialBackOff() + bo.MaxInterval = time.Minute + bo.MaxElapsedTime = time.Minute * 2 + + if err := backoff.RetryNotify(getIP, bo, func(err error, d time.Duration) { + log.Debug().Err(err).Str("duration", d.String()).Msg("failed to get zdb public IP") + }); err != nil && len(containerIPs) == 0 { + return nil, errors.Wrapf(err, "failed to get an IP for interface") + } + + return containerIPs, nil +} + +func (p *Manager) createZDBNamespace(containerID pkg.ContainerID, nsID string, config ZDB) error { + zdbCl := zdbConnection(containerID) + defer zdbCl.Close() + if err := zdbCl.Connect(); err != nil { + return errors.Wrapf(err, "failed to connect to 0-db: %s", containerID) + } + + exists, err := zdbCl.Exist(nsID) + if err != nil { + return err + } + if !exists { + if err := zdbCl.CreateNamespace(nsID); err != nil { + return errors.Wrapf(err, "failed to create namespace in 0-db: %s", containerID) + } + + if err := zdbCl.NamespaceSetMode(nsID, string(config.Mode)); err != nil { + return errors.Wrap(err, "failed to set namespace mode") + } + } + + if config.Password != "" { + if err := zdbCl.NamespaceSetPassword(nsID, config.Password); err != nil { + return errors.Wrapf(err, "failed to set password namespace %s in 0-db: %s", nsID, containerID) + } + } + + if err := zdbCl.NamespaceSetPublic(nsID, config.Public); err != nil { + return errors.Wrapf(err, "failed to make namespace %s public in 0-db: %s", nsID, containerID) + } + + if err := zdbCl.NamespaceSetSize(nsID, uint64(config.Size)); err != nil { + return errors.Wrapf(err, "failed to set size on namespace %s in 0-db: %s", nsID, containerID) + } + + return nil +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + return newSafeError(p.zdbDecommissionImpl(ctx, wl)) +} + +func (p *Manager) zdbDecommissionImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + containers, err := p.zdbListContainers(ctx) + if err != nil { + return errors.Wrap(err, "failed to list running zdbs") + } + + for id, container := range containers { + con := zdbConnection(id) + if err := con.Connect(); err == nil { + defer con.Close() + if ok, _ := con.Exist(wl.ID.String()); ok { + if err := con.DeleteNamespace(wl.ID.String()); err != nil { + return errors.Wrap(err, "failed to delete namespace") + } + } + + continue + } + // if we failed to connect, may be check the data directory if the namespace exists + data, err := container.DataMount() + if err != nil { + log.Error().Err(err).Str("container-id", string(id)).Msg("failed to get container data directory") + return err + } + + idx := zdb.NewIndex(data) + if !idx.Exists(wl.ID.String()) { + continue + } + + return idx.Delete(wl.ID.String()) + } + + return nil +} + +func (p *Manager) findContainer(ctx context.Context, name string) (zdb.Client, error) { + containers, err := p.zdbListContainers(ctx) + if err != nil { + return nil, errors.Wrap(err, "failed to list running zdbs") + } + + for id := range containers { + cl := zdbConnection(id) + if err := cl.Connect(); err != nil { + log.Error().Err(err).Str("id", string(id)).Msg("failed to connect to zdb instance") + continue + } + + if ok, _ := cl.Exist(name); ok { + return cl, nil + } + + _ = cl.Close() + } + + return nil, os.ErrNotExist +} + +func (p *Manager) Pause(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + cl, err := p.findContainer(ctx, wl.ID.String()) + if os.IsNotExist(err) { + return nil + } else if err != nil { + return provision.UnChanged(err) + } + + defer cl.Close() + + if err := cl.NamespaceSetLock(wl.ID.String(), true); err != nil { + return provision.UnChanged(errors.Wrap(err, "failed to set namespace locking")) + } + + return provision.Paused() +} + +func (p *Manager) Resume(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + cl, err := p.findContainer(ctx, wl.ID.String()) + if os.IsNotExist(err) { + return nil + } else if err != nil { + return provision.UnChanged(err) + } + + defer cl.Close() + + if err := cl.NamespaceSetLock(wl.ID.String(), false); err != nil { + return provision.UnChanged(errors.Wrap(err, "failed to set namespace locking")) + } + + return provision.Ok() +} + +func (p *Manager) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + res, err := p.zdbUpdateImpl(ctx, wl) + return res, newSafeError(err) +} + +func (p *Manager) zdbUpdateImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) (result zos.ZDBResult, err error) { + current, err := provision.GetWorkload(ctx, wl.Name) + if err != nil { + // this should not happen but we need to have the check anyway + return result, errors.Wrapf(err, "no zdb workload with name '%s' is deployed", wl.Name.String()) + } + + var old ZDB + if err := json.Unmarshal(current.Data, &old); err != nil { + return result, errors.Wrap(err, "failed to decode reservation schema") + } + + var new ZDB + if err := json.Unmarshal(wl.Data, &new); err != nil { + return result, errors.Wrap(err, "failed to decode reservation schema") + } + + if new.Mode != old.Mode { + return result, provision.UnChanged(fmt.Errorf("cannot change namespace mode")) + } + + if new.Size < old.Size { + // technically shrinking a namespace is possible, but the problem is if you set it + // to a size smaller than actual size used by the namespace, zdb will just not accept + // writes anymore but NOT change the effective size. + // While this makes sense fro ZDB. it will not make zos able to calculate the namespace + // consumption because it will only count the size used by the workload from the user + // data but not actual size on disk. hence shrinking is not allowed. + return result, provision.UnChanged(fmt.Errorf("cannot shrink zdb namespace")) + } + + if new.Size == old.Size && new.Password == old.Password && new.Public == old.Public { + // unnecessary update. + return result, provision.ErrNoActionNeeded + } + containers, err := p.zdbListContainers(ctx) + if err != nil { + return result, provision.UnChanged(errors.Wrap(err, "failed to list running zdbs")) + } + + name := wl.ID.String() + for id, container := range containers { + con := zdbConnection(id) + if err := con.Connect(); err != nil { + log.Error().Err(err).Str("id", string(id)).Msg("failed t connect to zdb") + continue + } + defer con.Close() + if ok, _ := con.Exist(name); !ok { + continue + } + + // we try the size first because this can fail easy + if new.Size != old.Size { + free, reserved, err := reservedSpace(con) + if err != nil { + return result, provision.UnChanged(errors.Wrap(err, "failed to calculate free/reserved space from zdb")) + } + + if reserved+new.Size-old.Size > free { + return result, provision.UnChanged(fmt.Errorf("no enough free space to support new size")) + } + + if err := con.NamespaceSetSize(name, uint64(new.Size)); err != nil { + return result, provision.UnChanged(errors.Wrap(err, "failed to set new zdb namespace size")) + } + } + + // this is kinda proplamatic because what if we changed the size for example, but failed + // to setup the password + if new.Password != old.Password { + if err := con.NamespaceSetPassword(name, new.Password); err != nil { + return result, provision.UnChanged(errors.Wrap(err, "failed to set new password")) + } + } + if new.Public != old.Public { + if err := con.NamespaceSetPublic(name, new.Public); err != nil { + return result, provision.UnChanged(errors.Wrap(err, "failed to set public flag")) + } + } + var containerIPs []net.IP + if kernel.GetParams().IsLight() { + containerIPs, err = p.waitZDBIPsLight(ctx, container.Network.Namespace, container.CreatedAt) + } else { + containerIPs, err = p.waitZDBIPs(ctx, container.Network.Namespace, container.CreatedAt) + } + if err != nil { + return zos.ZDBResult{}, errors.Wrap(err, "failed to find IP address on zdb0 interface") + } + + return zos.ZDBResult{ + Namespace: name, + IPs: ipsToString(containerIPs), + Port: zdbPort, + }, nil + + } + + return result, fmt.Errorf("namespace not found") +} + +func socketDir(containerID pkg.ContainerID) string { + return fmt.Sprintf("/var/run/zdb_%s", containerID) +} + +func socketFile(containerID pkg.ContainerID) string { + return filepath.Join(socketDir(containerID), "zdb.sock") +} + +// we declare this method as a variable so we can +// mock it in testing. +var zdbConnection = func(id pkg.ContainerID) zdb.Client { + socket := fmt.Sprintf("unix://%s@%s", string(id), socketFile(id)) + return zdb.New(socket) +} + +func reservedSpace(con zdb.Client) (free, reserved gridtypes.Unit, err error) { + nss, err := con.Namespaces() + if err != nil { + return 0, 0, err + } + + for _, id := range nss { + ns, err := con.Namespace(id) + if err != nil { + return 0, 0, err + } + + if ns.Name == "default" { + free = ns.DataDiskFreespace + } + + reserved += ns.DataLimit + } + + return +} + +func ipsToString(ips []net.IP) []string { + result := make([]string, 0, len(ips)) + for _, ip := range ips { + result = append(result, ip.String()) + } + + return result +} + +// isPublic check if ip is a IPv6 public address +func isPublic(ip net.IP) bool { + if ip.To4() != nil { + return false + } + + return !(ip.IsLoopback() || + ip.IsLinkLocalUnicast() || + ip.IsLinkLocalMulticast() || + ip.IsInterfaceLocalMulticast()) +} + +// isPublic check if ip is a part of the yggdrasil 200::/7 range +var yggNet = net.IPNet{ + IP: net.ParseIP("200::"), + Mask: net.CIDRMask(7, 128), +} + +var myceliumNet = net.IPNet{ + IP: net.ParseIP("400::"), + Mask: net.CIDRMask(7, 128), +} + +func isYgg(ip net.IP) bool { + return yggNet.Contains(ip) +} + +func isMycelium(ip net.IP) bool { + return myceliumNet.Contains(ip) +} + +// InitializeZDB makes sure all required zdbs are running +func (p *Manager) Initialize(ctx context.Context) error { + if kernel.GetParams().IsLight() { + return p.initializeLight(ctx) + } + return p.initialize(ctx) +} + +// InitializeZDB makes sure all required zdbs are running +func (p *Manager) initialize(ctx context.Context) error { + var ( + storage = stubs.NewStorageModuleStub(p.zbus) + contmod = stubs.NewContainerModuleStub(p.zbus) + network = stubs.NewNetworkerStub(p.zbus) + flistmod = stubs.NewFlisterStub(p.zbus) + ) + // fetching extected hash + log.Debug().Msg("fetching flist hash") + expected, err := flistmod.FlistHash(ctx, zdbFlistURL) + if err != nil { + log.Error().Err(err).Msg("could not load expected flist hash") + return err + } + + devices, err := storage.Devices(ctx) + if err != nil { + return errors.Wrap(err, "failed to list allocated zdb devices") + } + + log.Debug().Msgf("alloced devices for zdb: %+v", devices) + poolNames := make(map[string]pkg.Device) + for _, device := range devices { + poolNames[device.ID] = device + } + + containers, err := contmod.List(ctx, zdbContainerNS) + if err != nil { + return errors.Wrap(err, "failed to list running zdb container") + } + + for _, container := range containers { + if err := p.upgradeRuntime(ctx, expected, container); err != nil { + log.Error().Err(err).Msg("failed to upgrade running zdb container") + } + + log.Debug().Str("container", string(container)).Msg("enusreing zdb network setup") + _, err := network.EnsureZDBPrepare(ctx, poolNames[string(container)].ID) + if err != nil { + log.Error().Err(err).Msg("failed to prepare zdb network") + } + + delete(poolNames, string(container)) + } + + log.Debug().Msg("running zdb network setup migration") + if err := network.MigrateZdbMacvlanToVeth(ctx); err != nil { + log.Error().Err(err).Send() + } + + // do we still have allocated pools that does not have associated zdbs. + for _, device := range poolNames { + log.Debug().Str("device", device.Path).Msg("starting zdb") + if _, err := p.ensureZdbContainer(ctx, device); err != nil { + log.Error().Err(err).Str("pool", device.ID).Msg("failed to create zdb container associated with pool") + } + } + return nil +} + +// InitializeZDB makes sure all required zdbs are running +func (p *Manager) initializeLight(ctx context.Context) error { + var ( + storage = stubs.NewStorageModuleStub(p.zbus) + contmod = stubs.NewContainerModuleStub(p.zbus) + network = stubs.NewNetworkerLightStub(p.zbus) + flistmod = stubs.NewFlisterStub(p.zbus) + ) + // fetching extected hash + log.Debug().Msg("fetching flist hash") + expected, err := flistmod.FlistHash(ctx, zdbFlistURL) + if err != nil { + log.Error().Err(err).Msg("could not load expected flist hash") + return err + } + + devices, err := storage.Devices(ctx) + if err != nil { + return errors.Wrap(err, "failed to list allocated zdb devices") + } + + log.Debug().Msgf("alloced devices for zdb: %+v", devices) + poolNames := make(map[string]pkg.Device) + for _, device := range devices { + poolNames[device.ID] = device + } + + containers, err := contmod.List(ctx, zdbContainerNS) + if err != nil { + return errors.Wrap(err, "failed to list running zdb container") + } + + for _, container := range containers { + if err := p.upgradeRuntime(ctx, expected, container); err != nil { + log.Error().Err(err).Msg("failed to upgrade running zdb container") + } + + log.Debug().Str("container", string(container)).Msg("enusreing zdb network setup") + _, err := network.AttachZDB(ctx, poolNames[string(container)].ID) + if err != nil { + log.Error().Err(err).Msg("failed to initialize zdb network") + } + + delete(poolNames, string(container)) + } + + // do we still have allocated pools that does not have associated zdbs. + for _, device := range poolNames { + log.Debug().Str("device", device.Path).Msg("starting zdb") + if _, err := p.ensureZdbContainer(ctx, device); err != nil { + log.Error().Err(err).Str("pool", device.ID).Msg("failed to create zdb container associated with pool") + } + } + return nil +} + +func (p *Manager) upgradeRuntime(ctx context.Context, expected string, container pkg.ContainerID) error { + var ( + flistmod = stubs.NewFlisterStub(p.zbus) + contmod = stubs.NewContainerModuleStub(p.zbus) + ) + continfo, err := contmod.Inspect(ctx, zdbContainerNS, container) + if err != nil { + return err + } + + hash, err := flistmod.HashFromRootPath(ctx, continfo.RootFS) + if err != nil { + return errors.Wrap(err, "could not find container running flist hash") + } + + log.Debug().Str("hash", hash).Msg("running container hash") + if hash == expected { + return nil + } + + log.Info().Str("id", string(container)).Msg("restarting container, update found") + + // extracting required informations + volumeid := continfo.Name // VolumeID is the Container Name + volumepath := "" // VolumePath is /data mount on the container + socketdir := "" // SocketDir is /socket on the container + zdbcmd := continfo.Entrypoint + netns := continfo.Network.Namespace + + log.Info().Str("id", volumeid).Str("path", volumepath).Msg("rebuild zdb container") + + for _, mnt := range continfo.Mounts { + if mnt.Target == zdbContainerDataMnt { + volumepath = mnt.Source + } + + if mnt.Target == "/socket" { + socketdir = mnt.Source + } + } + + if volumepath == "" { + return fmt.Errorf("could not grab container /data mountpoint") + } + + // stopping running zdb + + if err := contmod.Delete(ctx, zdbContainerNS, container); err != nil { + return errors.Wrap(err, "could not stop running zdb container") + } + + // cleanup old containers rootfs + if err = flistmod.Unmount(ctx, volumeid); err != nil { + log.Error().Err(err).Str("path", continfo.RootFS).Msgf("failed to unmount old zdb container") + } + + // restarting zdb + + // mount the new flist + rootfs, err := p.zdbRootFS(ctx) + if err != nil { + return errors.Wrap(err, "could not initialize zdb rootfs") + } + + // respawn the container + err = p.zdbRun(ctx, volumeid, rootfs, zdbcmd, netns, volumepath, socketdir) + if err != nil { + log.Error().Err(err).Msg("could not restart zdb container") + + if err = flistmod.Unmount(ctx, volumeid); err != nil { + log.Error().Err(err).Str("path", rootfs).Msgf("failed to unmount zdb container") + } + } + + return nil +} diff --git a/pkg/primitives/zlogs/zlogs.go b/pkg/primitives/zlogs/zlogs.go new file mode 100644 index 00000000..c9635ca5 --- /dev/null +++ b/pkg/primitives/zlogs/zlogs.go @@ -0,0 +1,73 @@ +package zlogs + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/pkg/errors" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var _ provision.Manager = (*Manager)(nil) + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + var ( + vm = stubs.NewVMModuleStub(p.zbus) + network = stubs.NewNetworkerStub(p.zbus) + ) + + var cfg zos.ZLogs + if err := json.Unmarshal(wl.Data, &cfg); err != nil { + return nil, errors.Wrap(err, "failed to decode zlogs config") + } + + machine, err := provision.GetWorkload(ctx, cfg.ZMachine) + if err != nil || machine.Type != zos.ZMachineType { + return nil, errors.Wrapf(err, "no zmachine with name '%s'", cfg.ZMachine) + } + + if !machine.Result.State.IsOkay() { + return nil, errors.Wrapf(err, "machine state is not ok") + } + + var machineCfg zos.ZMachine + if err := json.Unmarshal(machine.Data, &machineCfg); err != nil { + return nil, errors.Wrap(err, "failed to decode zlogs config") + } + + var net gridtypes.Name + + if len(machineCfg.Network.Interfaces) > 0 { + net = machineCfg.Network.Interfaces[0].Network + } else { + return nil, fmt.Errorf("invalid zmachine network configuration") + } + + twin, _ := provision.GetDeploymentID(ctx) + + return nil, vm.StreamCreate(ctx, machine.ID.String(), pkg.Stream{ + ID: wl.ID.String(), + Namespace: network.Namespace(ctx, zos.NetworkID(twin, net)), + Output: cfg.Output, + }) +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + vm := stubs.NewVMModuleStub(p.zbus) + + return vm.StreamDelete(ctx, wl.ID.String()) +} diff --git a/pkg/primitives/zmount/zmount.go b/pkg/primitives/zmount/zmount.go new file mode 100644 index 00000000..24796bdf --- /dev/null +++ b/pkg/primitives/zmount/zmount.go @@ -0,0 +1,131 @@ +package zmount + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zbus" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/stubs" +) + +var ( + _ provision.Manager = (*Manager)(nil) + _ provision.Updater = (*Manager)(nil) +) + +// ZMount defines a mount point +type ZMount = zos.ZMount + +// ZMountResult types +type ZMountResult = zos.ZMountResult + +type Manager struct { + zbus zbus.Client +} + +func NewManager(zbus zbus.Client) *Manager { + return &Manager{zbus} +} + +// VolumeProvision is entry point to provision a volume +func (p *Manager) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return p.volumeProvisionImpl(ctx, wl) +} + +func (p *Manager) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + vdisk := stubs.NewStorageModuleStub(p.zbus) + return vdisk.DiskDelete(ctx, wl.ID.String()) +} + +func (p *Manager) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + return p.zMountUpdateImpl(ctx, wl) +} + +func (p *Manager) volumeProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) (vol ZMountResult, err error) { + var config ZMount + if err := json.Unmarshal(wl.Data, &config); err != nil { + return ZMountResult{}, err + } + + vol.ID = wl.ID.String() + vdisk := stubs.NewStorageModuleStub(p.zbus) + if vdisk.DiskExists(ctx, vol.ID) { + return vol, nil + } + + _, err = vdisk.DiskCreate(ctx, vol.ID, config.Size) + + return vol, err +} + +// VolumeProvision is entry point to provision a volume +func (p *Manager) zMountUpdateImpl(ctx context.Context, wl *gridtypes.WorkloadWithID) (vol ZMountResult, err error) { + log.Debug().Msg("updating zmount") + current, err := provision.GetWorkload(ctx, wl.Name) + if err != nil { + // this should not happen but we need to have the check anyway + return vol, errors.Wrapf(err, "no zmount workload with name '%s' is deployed", wl.Name.String()) + } + + var old ZMount + if err := json.Unmarshal(current.Data, &old); err != nil { + return vol, errors.Wrap(err, "failed to decode reservation schema") + } + + var new ZMount + if err := json.Unmarshal(wl.Data, &new); err != nil { + return vol, errors.Wrap(err, "failed to decode reservation schema") + } + + if new.Size == old.Size { + return vol, provision.ErrNoActionNeeded + } else if new.Size < old.Size { + return vol, provision.UnChanged(fmt.Errorf("not safe to shrink a disk")) + } + + // now validate that disk is not being used right now + deployment, err := provision.GetDeployment(ctx) + if err != nil { + return vol, provision.UnChanged(errors.Wrap(err, "failed to get deployment")) + } + + vms := deployment.ByType(zos.ZMachineType) + log.Debug().Int("count", len(vms)).Msg("found zmachines in deployment") + for _, vm := range vms { + // vm not running, no need to check + if !vm.Result.State.IsOkay() { + continue + } + + var data zos.ZMachine + if err := json.Unmarshal(vm.Data, &data); err != nil { + return vol, provision.UnChanged(errors.Wrap(err, "failed to load vm information")) + } + + for _, mnt := range data.Mounts { + if mnt.Name == wl.Name { + return vol, provision.UnChanged(fmt.Errorf("disk is mounted, please delete the VM first")) + } + } + } + + log.Debug().Str("disk", wl.ID.String()).Msg("disk is not used, proceed with update") + vdisk := stubs.NewStorageModuleStub(p.zbus) + + // okay, so no vm is using this disk. time to try resize. + + // we know it's safe to resize the disk, it won't break it so we + // can be sure we can wrap the error into an unchanged error + vol.ID = wl.ID.String() + if _, err := vdisk.DiskResize(ctx, wl.ID.String(), new.Size); err != nil { + return vol, provision.UnChanged(err) + } + + return vol, nil +} diff --git a/pkg/provision.go b/pkg/provision.go new file mode 100644 index 00000000..f4d1b28b --- /dev/null +++ b/pkg/provision.go @@ -0,0 +1,62 @@ +package pkg + +//go:generate zbusc -module provision -version 0.0.1 -name provision -package stubs github.com/threefoldtech/zos4/pkg+Provision stubs/provision_stub.go +//go:generate zbusc -module provision -version 0.0.1 -name statistics -package stubs github.com/threefoldtech/zos4/pkg+Statistics stubs/statistics_stub.go + +import ( + "context" + + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +// Provision interface +type Provision interface { + DecommissionCached(id string, reason string) error + // GetWorkloadStatus: returns status, bool(true if workload exits otherwise it is false), error + GetWorkloadStatus(id string) (gridtypes.ResultState, bool, error) + CreateOrUpdate(twin uint32, deployment gridtypes.Deployment, update bool) error + Get(twin uint32, contractID uint64) (gridtypes.Deployment, error) + List(twin uint32) ([]gridtypes.Deployment, error) + Changes(twin uint32, contractID uint64) ([]gridtypes.Workload, error) + ListPublicIPs() ([]string, error) + ListPrivateIPs(twin uint32, network gridtypes.Name) ([]string, error) +} + +type Statistics interface { + ReservedStream(ctx context.Context) <-chan gridtypes.Capacity + Current() (gridtypes.Capacity, error) + Total() gridtypes.Capacity + Workloads() (int, error) + GetCounters() (Counters, error) + ListGPUs() ([]GPUInfo, error) +} + +type Counters struct { + // Total system capacity + Total gridtypes.Capacity `json:"total"` + // Used capacity this include user + system resources + Used gridtypes.Capacity `json:"used"` + // System resource reserved by zos + System gridtypes.Capacity `json:"system"` + // Users statistics by zos + Users UsersCounters `json:"users"` + // OpenConnecions number of open connections in the node + OpenConnecions int `json:"open_connections"` +} + +// UsersCounters the expected counters for deployments and workloads +type UsersCounters struct { + // Total deployments count + Deployments int `json:"deployments"` + // Total workloads count + Workloads int `json:"workloads"` + // Last deployment timestamp + LastDeploymentTimestamp gridtypes.Timestamp `json:"last_deployment_timestamp"` +} + +type GPUInfo struct { + ID string `json:"id"` + Vendor string `json:"vendor"` + Device string `json:"device"` + Contract uint64 `json:"contract"` +} diff --git a/pkg/provision/auth.go b/pkg/provision/auth.go new file mode 100644 index 00000000..6c3a0f8c --- /dev/null +++ b/pkg/provision/auth.go @@ -0,0 +1,87 @@ +package provision + +import ( + "context" + "crypto/ed25519" + "encoding/base64" + "fmt" + + lru "github.com/hashicorp/golang-lru" + "github.com/pkg/errors" + "github.com/threefoldtech/zos4/pkg/stubs" +) + +type registrarTwins struct { + registrarGateway *stubs.RegistrarGatewayStub + mem *lru.Cache +} + +// NewRegistrarTwins creates a users db that implements the provision.Users interface. +func NewRegistrarTwins(registrarGateway *stubs.RegistrarGatewayStub) (Twins, error) { + cache, err := lru.New(1024) + if err != nil { + return nil, err + } + + return ®istrarTwins{ + registrarGateway: registrarGateway, + mem: cache, + }, nil +} + +// GetKey gets twins public key +func (s *registrarTwins) GetKey(id uint32) ([]byte, error) { + if value, ok := s.mem.Get(id); ok { + return value.([]byte), nil + } + user, err := s.registrarGateway.GetTwin(context.Background(), uint64(id)) + if err != nil { + return nil, errors.Wrapf(err, "could not get user with id '%d'", id) + } + + key, err := base64.StdEncoding.DecodeString(user.PublicKey) + if err != nil { + return nil, errors.Wrapf(err, "could decode public key for user with id '%d'", id) + } + + s.mem.Add(id, key) + return key, nil +} + +type registrarAdmins struct { + twin uint32 + pk ed25519.PublicKey +} + +// NewRegistrarAdmins creates a twins db that implements the provision.Users interface. +// but it also make sure the user is an admin +func NewRegistrarAdmins(registrarGateway *stubs.RegistrarGatewayStub, farmID uint64) (Twins, error) { + farm, err := registrarGateway.GetFarm(context.Background(), farmID) + if err != nil { + return nil, errors.Wrap(err, "failed to get farm") + } + + twin, err := registrarGateway.GetTwin(context.Background(), farm.TwinID) + if err != nil { + return nil, err + } + + key, err := base64.StdEncoding.DecodeString(twin.PublicKey) + if err != nil { + return nil, errors.Wrapf(err, "could decode public key for twin with farm id '%d'", farmID) + } + + return ®istrarAdmins{ + twin: uint32(farm.TwinID), + pk: key, + }, nil +} + +// GetKey gets twin public key if twin is valid admin +func (s *registrarAdmins) GetKey(id uint32) ([]byte, error) { + if id != s.twin { + return nil, fmt.Errorf("twin with id '%d' is not an admin", id) + } + + return []byte(s.pk), nil +} diff --git a/pkg/provision/common/doc.go b/pkg/provision/common/doc.go new file mode 100644 index 00000000..ae0ffa62 --- /dev/null +++ b/pkg/provision/common/doc.go @@ -0,0 +1,3 @@ +// Package common hold logic that is used by both the provision and primitive package +// it purpose is mainly to avoid circular dependencies +package common diff --git a/pkg/provision/doc.go b/pkg/provision/doc.go new file mode 100644 index 00000000..3b51a21e --- /dev/null +++ b/pkg/provision/doc.go @@ -0,0 +1,4 @@ +// Package provision exposes the Engine type. +// Engine is a fully configurable type that can be used to +// implement custom provisioning of workloads +package provision diff --git a/pkg/provision/engine.go b/pkg/provision/engine.go new file mode 100644 index 00000000..a24862b2 --- /dev/null +++ b/pkg/provision/engine.go @@ -0,0 +1,1239 @@ +package provision + +import ( + "context" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "net/url" + "os" + "path/filepath" + "sort" + "time" + + "github.com/cenkalti/backoff/v3" + "github.com/hashicorp/go-retryablehttp" + "github.com/joncrlsn/dque" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go" + zos4pkg "github.com/threefoldtech/zos4/pkg" + "github.com/threefoldtech/zos4/pkg/stubs" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/environment" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" +) + +// EngineOption interface +type EngineOption interface { + apply(e *NativeEngine) +} + +// WithTwins sets the user key getter on the +// engine +func WithTwins(g Twins) EngineOption { + return &withUserKeyGetter{g} +} + +// WithAdmins sets the admins key getter on the +// engine +func WithAdmins(g Twins) EngineOption { + return &withAdminsKeyGetter{g} +} + +// WithStartupOrder forces a specific startup order of types +// any type that is not listed in this list, will get started +// in an nondeterministic order +func WithStartupOrder(t ...gridtypes.WorkloadType) EngineOption { + return &withStartupOrder{t} +} + +// WithAPIGateway sets the API Gateway. If set it will +// be used by the engine to fetch (and validate) the deployment contract +// then contract with be available on the deployment context +func WithAPIGateway(node uint64, substrateGateway *stubs.RegistrarGatewayStub) EngineOption { + return &withAPIGateway{node, substrateGateway} +} + +// WithRerunAll if set forces the engine to re-run all reservations +// on engine start. +func WithRerunAll(t bool) EngineOption { + return &withRerunAll{t} +} + +type Callback func(twin uint32, contract uint64, delete bool) + +// WithCallback sets a callback that is called when a deployment is being Created, Updated, Or Deleted +// The handler then can use the id to get current "state" of the deployment from storage and +// take proper action. A callback must not block otherwise the engine operation will get blocked +func WithCallback(cb Callback) EngineOption { + return &withCallback{cb} +} + +type jobOperation int + +const ( + // opProvision installs a deployment + opProvision jobOperation = iota + // removes a deployment + opDeprovision + // deletes a deployment + opUpdate + // opProvisionNoValidation is used to reinstall + // a deployment on node reboot without validating + // against the chain again because 1) validation + // has already been done on first installation + // 2) hash is not granteed to match because of the + // order of the workloads doesn't have to match + // the one sent by the user + opProvisionNoValidation + // opPause, pauses a deployment + opPause + // opResume resumes a deployment + opResume + // servers default timeout + defaultHttpTimeout = 10 * time.Second +) + +// engineJob is a persisted job instance that is +// stored in a queue. the queue uses a GOB encoder +// so please make sure that edits to this struct is +// ONLY by adding new fields or deleting older fields +// but never rename or change the type of a field. +type engineJob struct { + Op jobOperation + Target gridtypes.Deployment + Source *gridtypes.Deployment + Message string +} + +// NativeEngine is the core of this package +// The engine is responsible to manage provision and decomission of workloads on the system +type NativeEngine struct { + storage Storage + provisioner Provisioner + + queue *dque.DQue + + // options + // janitor Janitor + twins Twins + admins Twins + order []gridtypes.WorkloadType + typeIndex map[gridtypes.WorkloadType]int + rerunAll bool + // substrate specific attributes + nodeID uint64 + registrarGateway *stubs.RegistrarGatewayStub + callback Callback +} + +var ( + _ Engine = (*NativeEngine)(nil) + _ zos4pkg.Provision = (*NativeEngine)(nil) +) + +type withUserKeyGetter struct { + g Twins +} + +func (o *withUserKeyGetter) apply(e *NativeEngine) { + e.twins = o.g +} + +type withAdminsKeyGetter struct { + g Twins +} + +func (o *withAdminsKeyGetter) apply(e *NativeEngine) { + e.admins = o.g +} + +type withAPIGateway struct { + nodeID uint64 + substrateGateway *stubs.RegistrarGatewayStub +} + +func (o *withAPIGateway) apply(e *NativeEngine) { + e.nodeID = o.nodeID + e.registrarGateway = o.substrateGateway +} + +type withStartupOrder struct { + o []gridtypes.WorkloadType +} + +func (w *withStartupOrder) apply(e *NativeEngine) { + all := make(map[gridtypes.WorkloadType]struct{}) + for _, typ := range e.order { + all[typ] = struct{}{} + } + ordered := make([]gridtypes.WorkloadType, 0, len(all)) + for _, typ := range w.o { + if _, ok := all[typ]; !ok { + panic(fmt.Sprintf("type '%s' is not registered", typ)) + } + delete(all, typ) + ordered = append(ordered, typ) + e.typeIndex[typ] = len(ordered) + } + // now move everything else + for typ := range all { + ordered = append(ordered, typ) + e.typeIndex[typ] = len(ordered) + } + + e.order = ordered +} + +type withRerunAll struct { + t bool +} + +func (w *withRerunAll) apply(e *NativeEngine) { + e.rerunAll = w.t +} + +type withCallback struct { + cb Callback +} + +func (w *withCallback) apply(e *NativeEngine) { + e.callback = w.cb +} + +type nullKeyGetter struct{} + +func (n *nullKeyGetter) GetKey(id uint32) ([]byte, error) { + return nil, fmt.Errorf("null user key getter") +} + +type ( + engineKey struct{} + deploymentKey struct{} + deploymentValue struct { + twin uint32 + deployment uint64 + } +) + +type ( + contractKey struct{} + rentKey struct{} +) + +// GetEngine gets engine from context +func GetEngine(ctx context.Context) Engine { + return ctx.Value(engineKey{}).(Engine) +} + +// GetDeploymentID gets twin and deployment ID for current deployment +func GetDeploymentID(ctx context.Context) (twin uint32, deployment uint64) { + values := ctx.Value(deploymentKey{}).(deploymentValue) + return values.twin, values.deployment +} + +// GetDeployment gets a copy of the current deployment with latest state +func GetDeployment(ctx context.Context) (gridtypes.Deployment, error) { + // we store the pointer on the context so changed to deployment object + // actually reflect into the value. + engine := GetEngine(ctx) + twin, deployment := GetDeploymentID(ctx) + + // BUT we always return a copy so caller of GetDeployment can NOT manipulate + // other attributed on the object. + return engine.Storage().Get(twin, deployment) +} + +// GetWorkload get the last state of the workload for the current deployment +func GetWorkload(ctx context.Context, name gridtypes.Name) (gridtypes.WorkloadWithID, error) { + // we store the pointer on the context so changed to deployment object + // actually reflect into the value. + engine := GetEngine(ctx) + twin, deployment := GetDeploymentID(ctx) + + // BUT we always return a copy so caller of GetDeployment can NOT manipulate + // other attributed on the object. + wl, err := engine.Storage().Current(twin, deployment, name) + if err != nil { + return gridtypes.WorkloadWithID{}, err + } + + return gridtypes.WorkloadWithID{ + Workload: &wl, + ID: gridtypes.NewUncheckedWorkloadID(twin, deployment, name), + }, nil +} + +func withDeployment(ctx context.Context, twin uint32, deployment uint64) context.Context { + return context.WithValue(ctx, deploymentKey{}, deploymentValue{twin, deployment}) +} + +// GetContract of deployment. panics if engine has no substrate set. +func GetContract(ctx context.Context) substrate.NodeContract { + return ctx.Value(contractKey{}).(substrate.NodeContract) +} + +func withContract(ctx context.Context, contract substrate.NodeContract) context.Context { + return context.WithValue(ctx, contractKey{}, contract) +} + +// IsRentedNode returns true if current node is rented +func IsRentedNode(ctx context.Context) bool { + v := ctx.Value(rentKey{}) + if v == nil { + return false + } + + return v.(bool) +} + +// sets node rented flag on the ctx +func withRented(ctx context.Context, rent bool) context.Context { + return context.WithValue(ctx, rentKey{}, rent) +} + +// New creates a new engine. Once started, the engine +// will continue processing all reservations from the reservation source +// and try to apply them. +// the default implementation is a single threaded worker. so it process +// one reservation at a time. On error, the engine will log the error. and +// continue to next reservation. +func New(storage Storage, provisioner Provisioner, root string, opts ...EngineOption) (*NativeEngine, error) { + e := &NativeEngine{ + storage: storage, + provisioner: provisioner, + twins: &nullKeyGetter{}, + admins: &nullKeyGetter{}, + order: gridtypes.Types(), + typeIndex: make(map[gridtypes.WorkloadType]int), + } + + for _, opt := range opts { + opt.apply(e) + } + + if e.rerunAll { + os.RemoveAll(filepath.Join(root, "jobs")) + } + + queue, err := dque.NewOrOpen("jobs", root, 512, func() interface{} { return &engineJob{} }) + if err != nil { + // if this happens it means data types has been changed in that case we need + // to clean up the queue and start over. unfortunately any un applied changes + os.RemoveAll(filepath.Join(root, "jobs")) + return nil, errors.Wrap(err, "failed to create job queue") + } + + e.queue = queue + return e, nil +} + +// Storage returns +func (e *NativeEngine) Storage() Storage { + return e.storage +} + +// Twins returns twins db +func (e *NativeEngine) Twins() Twins { + return e.twins +} + +// Admins returns admins db +func (e *NativeEngine) Admins() Twins { + return e.admins +} + +// Provision workload +func (e *NativeEngine) Provision(ctx context.Context, deployment gridtypes.Deployment) error { + if deployment.Version != 0 { + return errors.Wrap(ErrInvalidVersion, "expected version to be 0 on deployment creation") + } + + if err := e.storage.Create(deployment); err != nil { + return err + } + + job := engineJob{ + Target: deployment, + Op: opProvision, + } + + return e.queue.Enqueue(&job) +} + +// Pause deployment +func (e *NativeEngine) Pause(ctx context.Context, twin uint32, id uint64) error { + deployment, err := e.storage.Get(twin, id) + if err != nil { + return err + } + + log.Info(). + Uint32("twin", deployment.TwinID). + Uint64("contract", deployment.ContractID). + Msg("schedule for pausing") + + job := engineJob{ + Target: deployment, + Op: opPause, + } + + return e.queue.Enqueue(&job) +} + +// Resume deployment +func (e *NativeEngine) Resume(ctx context.Context, twin uint32, id uint64) error { + deployment, err := e.storage.Get(twin, id) + if err != nil { + return err + } + + log.Info(). + Uint32("twin", deployment.TwinID). + Uint64("contract", deployment.ContractID). + Msg("schedule for resuming") + + job := engineJob{ + Target: deployment, + Op: opResume, + } + + return e.queue.Enqueue(&job) +} + +// Deprovision workload +func (e *NativeEngine) Deprovision(ctx context.Context, twin uint32, id uint64, reason string) error { + deployment, err := e.storage.Get(twin, id) + if err != nil { + return err + } + + log.Info(). + Uint32("twin", deployment.TwinID). + Uint64("contract", deployment.ContractID). + Str("reason", reason). + Msg("schedule for deprovision") + + job := engineJob{ + Target: deployment, + Op: opDeprovision, + Message: reason, + } + + return e.queue.Enqueue(&job) +} + +// Update workloads +func (e *NativeEngine) Update(ctx context.Context, update gridtypes.Deployment) error { + deployment, err := e.storage.Get(update.TwinID, update.ContractID) + if err != nil { + return err + } + + // this will just calculate the update + // steps we run it here as a sort of validation + // that this update is acceptable. + upgrades, err := deployment.Upgrade(&update) + if err != nil { + return errors.Wrap(ErrDeploymentUpgradeValidationError, err.Error()) + } + + for _, op := range upgrades { + if op.Op == gridtypes.OpUpdate { + if !e.provisioner.CanUpdate(ctx, op.WlID.Type) { + return errors.Wrapf( + ErrDeploymentUpgradeValidationError, + "workload '%s' does not support upgrade", + op.WlID.Type.String()) + } + } + } + + // fields to update in storage + fields := []Field{ + VersionField{update.Version}, + SignatureRequirementField{update.SignatureRequirement}, + } + + if deployment.Description != update.Description { + fields = append(fields, DescriptionField{update.Description}) + } + if deployment.Metadata != update.Metadata { + fields = append(fields, MetadataField{update.Metadata}) + } + // update deployment fields, workloads will then can get updated separately + if err := e.storage.Update(update.TwinID, update.ContractID, fields...); err != nil { + return errors.Wrap(err, "failed to update deployment data") + } + // all is okay we can push the job + job := engineJob{ + Op: opUpdate, + Target: update, + Source: &deployment, + } + + return e.queue.Enqueue(&job) +} + +// Run starts reader reservation from the Source and handle them +func (e *NativeEngine) Run(root context.Context) error { + defer e.queue.Close() + + root = context.WithValue(root, engineKey{}, e) + + if e.rerunAll { + if err := e.boot(root); err != nil { + log.Error().Err(err).Msg("error while setting up") + } + } + + for { + obj, err := e.queue.PeekBlock() + if err != nil { + log.Error().Err(err).Msg("failed to check job queue") + <-time.After(2 * time.Second) + continue + } + + job := obj.(*engineJob) + ctx := withDeployment(root, job.Target.TwinID, job.Target.ContractID) + l := log.With(). + Uint32("twin", job.Target.TwinID). + Uint64("contract", job.Target.ContractID). + Logger() + + // contract validation + // this should ONLY be done on provosion and update operation + if job.Op == opProvision || + job.Op == opUpdate || + job.Op == opProvisionNoValidation { + // otherwise, contract validation is needed + ctx, err = e.validate(ctx, &job.Target, job.Op == opProvisionNoValidation) + if err != nil { + l.Error().Err(err).Msg("contact validation fails") + // job.Target.SetError(err) + if err := e.storage.Error(job.Target.TwinID, job.Target.ContractID, err); err != nil { + l.Error().Err(err).Msg("failed to set deployment global error") + } + _, _ = e.queue.Dequeue() + + continue + } + + l.Debug().Msg("contact validation pass") + } + + switch job.Op { + case opProvisionNoValidation: + fallthrough + case opProvision: + e.installDeployment(ctx, &job.Target) + case opDeprovision: + e.uninstallDeployment(ctx, &job.Target, job.Message) + case opPause: + e.lockDeployment(ctx, &job.Target) + case opResume: + e.unlockDeployment(ctx, &job.Target) + case opUpdate: + // update is tricky because we need to work against + // 2 versions of the object. Once that reflects the current state + // and the new one that is the target state but it does not know + // the current state of already deployed workloads + // so (1st) we need to get the difference + // this call will return 3 lists + // - things to remove + // - things to add + // - things to update (not supported atm) + // - things that is not in any of the 3 lists are basically stay as is + // the call will also make sure the Result of those workload in both the (did not change) + // and update to reflect the current result on those workloads. + update, err := job.Source.Upgrade(&job.Target) + if err != nil { + l.Error().Err(err).Msg("failed to get update procedure") + break + } + e.updateDeployment(ctx, update) + } + + _, err = e.queue.Dequeue() + if err != nil { + l.Error().Err(err).Msg("failed to dequeue job") + } + + e.safeCallback(&job.Target, job.Op == opDeprovision) + } +} + +func (e *NativeEngine) safeCallback(d *gridtypes.Deployment, delete bool) { + if e.callback == nil { + return + } + // in case callback panics we don't want to kill the engine + defer func() { + if err := recover(); err != nil { + log.Error().Msgf("panic while processing callback: %v", err) + } + }() + + e.callback(d.TwinID, d.ContractID, delete) +} + +// validate validates and injects the deployment contracts is substrate is configured +// for this instance of the provision engine. If noValidation is set contracts checks is skipped +func (e *NativeEngine) validate(ctx context.Context, dl *gridtypes.Deployment, noValidation bool) (context.Context, error) { + if e.registrarGateway == nil { + return ctx, fmt.Errorf("substrate is not configured in engine") + } + + contract, subErr := e.registrarGateway.GetContract(ctx, dl.ContractID) + if subErr.IsError() { + return nil, errors.Wrap(subErr.Err, "failed to get deployment contract") + } + + if !contract.ContractType.IsNodeContract { + return nil, fmt.Errorf("invalid contract type, expecting node contract") + } + ctx = withContract(ctx, contract.ContractType.NodeContract) + + rent, subErr := e.registrarGateway.GetNodeRentContract(ctx, uint32(e.nodeID)) + if subErr.IsError() && !subErr.IsCode(pkg.CodeNotFound) { + return nil, fmt.Errorf("failed to check node rent state") + } + + ctx = withRented(ctx, !subErr.IsError() && rent != 0) + + if noValidation { + return ctx, nil + } + + if uint32(contract.ContractType.NodeContract.Node) != uint32(e.nodeID) { + return nil, fmt.Errorf("invalid node address in contract") + } + + hash, err := dl.ChallengeHash() + if err != nil { + return nil, errors.Wrap(err, "failed to compute deployment hash") + } + + if contract.ContractType.NodeContract.DeploymentHash.String() != hex.EncodeToString(hash) { + return nil, fmt.Errorf("contract hash does not match deployment hash") + } + + return ctx, nil +} + +// boot will make sure to re-deploy all stored reservation +// on boot. +func (e *NativeEngine) boot(root context.Context) error { + storage := e.Storage() + twins, err := storage.Twins() + if err != nil { + return errors.Wrap(err, "failed to list twins") + } + for _, twin := range twins { + ids, err := storage.ByTwin(twin) + if err != nil { + log.Error().Err(err).Uint32("twin", twin).Msg("failed to list deployments for twin") + continue + } + + for _, id := range ids { + dl, err := storage.Get(twin, id) + if err != nil { + log.Error().Err(err).Uint32("twin", twin).Uint64("id", id).Msg("failed to load deployment") + continue + } + // unfortunately we have to inject this value here + // since the boot runs outside the engine queue. + + if !dl.IsActive() { + continue + } + + job := engineJob{ + Target: dl, + Op: opProvisionNoValidation, + } + + if err := e.queue.Enqueue(&job); err != nil { + log.Error(). + Err(err). + Uint32("twin", dl.TwinID). + Uint64("dl", dl.ContractID). + Msg("failed to queue deployment for processing") + } + } + } + + return nil +} + +func (e *NativeEngine) uninstallWorkload(ctx context.Context, wl *gridtypes.WorkloadWithID, reason string) error { + twin, deployment, name, _ := wl.ID.Parts() + log := log.With(). + Uint32("twin", twin). + Uint64("deployment", deployment). + Stringer("name", name). + Str("type", wl.Type.String()). + Logger() + + _, err := e.storage.Current(twin, deployment, name) + if errors.Is(err, ErrWorkloadNotExist) { + return nil + } else if err != nil { + return err + } + + log.Debug().Str("workload", string(wl.Name)).Msg("de-provisioning") + + result := gridtypes.Result{ + State: gridtypes.StateDeleted, + Error: reason, + } + if err := e.provisioner.Deprovision(ctx, wl); err != nil { + log.Error().Err(err).Stringer("id", wl.ID).Msg("failed to uninstall workload") + result.State = gridtypes.StateError + result.Error = err.Error() + } + + result.Created = gridtypes.Timestamp(time.Now().Unix()) + + if err := e.storage.Transaction(twin, deployment, wl.Workload.WithResults(result)); err != nil { + return err + } + + if result.State == gridtypes.StateDeleted { + return e.storage.Remove(twin, deployment, name) + } + + return nil +} + +func (e *NativeEngine) installWorkload(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + // this workload is already deleted or in error state + // we don't try again + twin, deployment, name, _ := wl.ID.Parts() + + current, err := e.storage.Current(twin, deployment, name) + if errors.Is(err, ErrWorkloadNotExist) { + // this can happen if installWorkload was called upon a deployment update operation + // so this is a totally new workload that was not part of the original deployment + // hence a call to Add is needed + if err := e.storage.Add(twin, deployment, *wl.Workload); err != nil { + return errors.Wrap(err, "failed to add workload to storage") + } + } else if err != nil { + // another error + return errors.Wrapf(err, "failed to get last transaction for '%s'", wl.ID.String()) + } else { + // workload exists, but we trying to re-install it so this might be + // after a reboot. hence we need to check last state. + // if it has been deleted, error state, we do nothing. + // otherwise, we-reinstall it + if current.Result.State.IsAny(gridtypes.StateDeleted, gridtypes.StateError) { + // nothing to do! + return nil + } + } + + log := log.With(). + Uint32("twin", twin). + Uint64("deployment", deployment). + Stringer("name", wl.Name). + Str("type", wl.Type.String()). + Logger() + + log.Debug().Msg("provisioning") + result, err := e.provisioner.Provision(ctx, wl) + if errors.Is(err, ErrNoActionNeeded) { + // workload already exist, so no need to create a new transaction + return nil + } else if err != nil { + result.Created = gridtypes.Now() + result.State = gridtypes.StateError + result.Error = err.Error() + } + + if result.State == gridtypes.StateError { + log.Error().Str("error", result.Error).Msg("failed to deploy workload") + } + + return e.storage.Transaction( + twin, + deployment, + wl.Workload.WithResults(result)) +} + +func (e *NativeEngine) updateWorkload(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + twin, deployment, name, _ := wl.ID.Parts() + log := log.With(). + Uint32("twin", twin). + Uint64("deployment", deployment). + Stringer("name", name). + Str("type", wl.Type.String()). + Logger() + + log.Debug().Msg("provisioning") + + var result gridtypes.Result + var err error + if e.provisioner.CanUpdate(ctx, wl.Type) { + result, err = e.provisioner.Update(ctx, wl) + } else { + // deprecated. We should never update resources by decommission and then provision + // the check in Update method should prevent this + // #unreachable + err = fmt.Errorf("can not update this workload type") + } + + if errors.Is(err, ErrNoActionNeeded) { + currentWl, err := e.storage.Current(twin, deployment, name) + if err != nil { + return err + } + result = currentWl.Result + } else if err != nil { + return err + } + + return e.storage.Transaction(twin, deployment, wl.Workload.WithResults(result)) +} + +func (e *NativeEngine) lockWorkload(ctx context.Context, wl *gridtypes.WorkloadWithID, lock bool) error { + // this workload is already deleted or in error state + // we don't try again + twin, deployment, name, _ := wl.ID.Parts() + + current, err := e.storage.Current(twin, deployment, name) + if err != nil { + // another error + return errors.Wrapf(err, "failed to get last transaction for '%s'", wl.ID.String()) + } else { + if !current.Result.State.IsOkay() { + // nothing to do! it's either in error state or something else. + return nil + } + } + + log := log.With(). + Uint32("twin", twin). + Uint64("deployment", deployment). + Stringer("name", wl.Name). + Str("type", wl.Type.String()). + Bool("lock", lock). + Logger() + + log.Debug().Msg("setting locking on workload") + action := e.provisioner.Resume + if lock { + action = e.provisioner.Pause + } + result, err := action(ctx, wl) + if errors.Is(err, ErrNoActionNeeded) { + // workload already exist, so no need to create a new transaction + return nil + } else if err != nil { + return err + } + + if result.State == gridtypes.StateError { + log.Error().Str("error", result.Error).Msg("failed to set locking on workload") + } + + return e.storage.Transaction( + twin, + deployment, + wl.Workload.WithResults(result)) +} + +func (e *NativeEngine) uninstallDeployment(ctx context.Context, dl *gridtypes.Deployment, reason string) { + var errors bool + for i := len(e.order) - 1; i >= 0; i-- { + typ := e.order[i] + + workloads := dl.ByType(typ) + for _, wl := range workloads { + if err := e.uninstallWorkload(ctx, wl, reason); err != nil { + errors = true + log.Error().Err(err).Stringer("id", wl.ID).Msg("failed to un-install workload") + } + } + } + + if errors { + return + } + + if err := e.storage.Delete(dl.TwinID, dl.ContractID); err != nil { + log.Error().Err(err). + Uint32("twin", dl.TwinID). + Uint64("contract", dl.ContractID). + Msg("failed to delete deployment") + } +} + +func getMountSize(wl *gridtypes.Workload) (gridtypes.Unit, error) { + data, err := wl.WorkloadData() + if err != nil { + return 0, err + } + switch d := data.(type) { + case *zos.ZMount: + return d.Size, nil + case *zos.Volume: + return d.Size, nil + default: + return 0, fmt.Errorf("failed to get workload as zmount or volume '%v'", data) + } +} + +func sortMountWorkloads(workloads []*gridtypes.WorkloadWithID) { + sort.Slice(workloads, func(i, j int) bool { + sizeI, err := getMountSize(workloads[i].Workload) + if err != nil { + return false + } + + sizeJ, err := getMountSize(workloads[j].Workload) + if err != nil { + return false + } + + return sizeI > sizeJ + }) +} + +func (e *NativeEngine) installDeployment(ctx context.Context, getter gridtypes.WorkloadGetter) { + for _, typ := range e.order { + workloads := getter.ByType(typ) + + if typ == zos.ZMountType || typ == zos.VolumeType { + sortMountWorkloads(workloads) + } + + for _, wl := range workloads { + if err := e.installWorkload(ctx, wl); err != nil { + log.Error().Err(err).Stringer("id", wl.ID).Msg("failed to install workload") + } + } + } +} + +func (e *NativeEngine) lockDeployment(ctx context.Context, getter gridtypes.WorkloadGetter) { + for i := len(e.order) - 1; i >= 0; i-- { + typ := e.order[i] + + workloads := getter.ByType(typ) + + for _, wl := range workloads { + if err := e.lockWorkload(ctx, wl, true); err != nil { + log.Error().Err(err).Stringer("id", wl.ID).Msg("failed to lock workload") + } + } + } +} + +func (e *NativeEngine) unlockDeployment(ctx context.Context, getter gridtypes.WorkloadGetter) { + for _, typ := range e.order { + workloads := getter.ByType(typ) + + for _, wl := range workloads { + if err := e.lockWorkload(ctx, wl, false); err != nil { + log.Error().Err(err).Stringer("id", wl.ID).Msg("failed to unlock workload") + } + } + } +} + +// sortOperations sortes the operations, removes first in reverse type order, then upgrades/creates in type order +func (e *NativeEngine) sortOperations(ops []gridtypes.UpgradeOp) { + // maps an operation to an integer, less comes first in sorting + opMap := func(op gridtypes.UpgradeOp) int { + if op.Op == gridtypes.OpRemove { + // removes are negative (typeIndex starts from 1) so they are always before creations/updates + // negated to apply in reverse order + return -e.typeIndex[op.WlID.Type] + } else { + // updates/creates are considered the same + return e.typeIndex[op.WlID.Type] + } + } + sort.SliceStable(ops, func(i, j int) bool { + return opMap(ops[i]) < opMap(ops[j]) + }) +} + +func (e *NativeEngine) updateDeployment(ctx context.Context, ops []gridtypes.UpgradeOp) (changed bool) { + e.sortOperations(ops) + for _, op := range ops { + var err error + switch op.Op { + case gridtypes.OpRemove: + err = e.uninstallWorkload(ctx, op.WlID, "deleted by an update") + case gridtypes.OpAdd: + err = e.installWorkload(ctx, op.WlID) + case gridtypes.OpUpdate: + err = e.updateWorkload(ctx, op.WlID) + } + + if err != nil { + log.Error().Err(err).Stringer("id", op.WlID.ID).Stringer("operation", op.Op).Msg("error while updating deployment") + } + } + return +} + +// DecommissionCached implements the zbus interface +func (e *NativeEngine) DecommissionCached(id string, reason string) error { + globalID := gridtypes.WorkloadID(id) + twin, dlID, name, err := globalID.Parts() + if err != nil { + return err + } + wl, err := e.storage.Current(twin, dlID, name) + if err != nil { + return err + } + + if wl.Result.State == gridtypes.StateDeleted || + wl.Result.State == gridtypes.StateError { + // nothing to do! + return nil + } + + // to bad we have to repeat this here + ctx := context.WithValue(context.Background(), engineKey{}, e) + ctx = withDeployment(ctx, twin, dlID) + + ctx, cancel := context.WithTimeout(ctx, 3*time.Minute) + defer cancel() + + err = e.uninstallWorkload(ctx, &gridtypes.WorkloadWithID{Workload: &wl, ID: globalID}, + fmt.Sprintf("workload decommissioned by system, reason: %s", reason), + ) + + return err +} + +func (n *NativeEngine) CreateOrUpdate(twin uint32, deployment gridtypes.Deployment, update bool) error { + if err := deployment.Valid(); err != nil { + return err + } + + if deployment.TwinID != twin { + return fmt.Errorf("twin id mismatch (deployment: %d, message: %d)", deployment.TwinID, twin) + } + + // make sure the account used is verified + check := func() error { + if ok, err := isTwinVerified(twin); err != nil { + return err + } else if !ok { + return fmt.Errorf("user with twin id %d is not verified", twin) + } + return nil + } + + if err := backoff.Retry(check, backoff.WithMaxRetries(backoff.NewExponentialBackOff(), 5)); err != nil { + return err + } + + if err := deployment.Verify(n.twins); err != nil { + return err + } + + // we need to ge the contract here and make sure + // we can validate the contract against it. + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + defer cancel() + + action := n.Provision + if update { + action = n.Update + } + + return action(ctx, deployment) +} + +func (n *NativeEngine) Get(twin uint32, contractID uint64) (gridtypes.Deployment, error) { + deployment, err := n.storage.Get(twin, contractID) + if errors.Is(err, ErrDeploymentNotExists) { + return gridtypes.Deployment{}, fmt.Errorf("deployment not found") + } else if err != nil { + return gridtypes.Deployment{}, err + } + + return deployment, nil +} + +func (n *NativeEngine) List(twin uint32) ([]gridtypes.Deployment, error) { + deploymentIDs, err := n.storage.ByTwin(twin) + if err != nil { + return nil, err + } + deployments := make([]gridtypes.Deployment, 0) + for _, id := range deploymentIDs { + deployment, err := n.storage.Get(twin, id) + if err != nil { + return nil, err + } + if !deployment.IsActive() { + continue + } + deployments = append(deployments, deployment) + } + return deployments, nil +} + +func (n *NativeEngine) Changes(twin uint32, contractID uint64) ([]gridtypes.Workload, error) { + changes, err := n.storage.Changes(twin, contractID) + if errors.Is(err, ErrDeploymentNotExists) { + return nil, fmt.Errorf("deployment not found") + } else if err != nil { + return nil, err + } + return changes, nil +} + +func (n *NativeEngine) ListPublicIPs() ([]string, error) { + // for efficiency this method should just find out configured public Ips. + // but currently the only way to do this is by scanning the nft rules + // another less efficient but good for now solution is to scan all + // reservations and find the ones with public IPs. + + twins, err := n.storage.Twins() + if err != nil { + return nil, errors.Wrap(err, "failed to list twins") + } + ips := make([]string, 0) + for _, twin := range twins { + deploymentsIDs, err := n.storage.ByTwin(twin) + if err != nil { + return nil, errors.Wrap(err, "failed to list twin deployment") + } + for _, id := range deploymentsIDs { + deployment, err := n.storage.Get(twin, id) + if err != nil { + return nil, errors.Wrap(err, "failed to load deployment") + } + workloads := deployment.ByType(zos.PublicIPv4Type, zos.PublicIPType) + + for _, workload := range workloads { + if workload.Result.State != gridtypes.StateOk { + continue + } + + var result zos.PublicIPResult + if err := workload.Result.Unmarshal(&result); err != nil { + return nil, err + } + + if result.IP.IP != nil { + ips = append(ips, result.IP.String()) + } + } + } + } + + return ips, nil +} + +func (n *NativeEngine) ListPrivateIPs(twin uint32, network gridtypes.Name) ([]string, error) { + deployments, err := n.List(twin) + if err != nil { + return nil, err + } + ips := make([]string, 0) + for _, deployment := range deployments { + vms := deployment.ByType(zos.ZMachineType) + for _, vm := range vms { + if vm.Result.State.IsAny(gridtypes.StateDeleted, gridtypes.StateError) { + continue + } + data, err := vm.WorkloadData() + if err != nil { + return nil, err + } + zmachine := data.(*zos.ZMachine) + for _, inf := range zmachine.Network.Interfaces { + if inf.Network == network { + ips = append(ips, inf.IP.String()) + } + } + } + } + return ips, nil +} + +func isNotFoundError(err error) bool { + if errors.Is(err, ErrWorkloadNotExist) || errors.Is(err, ErrDeploymentNotExists) { + return true + } + return false +} + +// GetWorkloadStatus get workload status, returns status, exists, error +func (e *NativeEngine) GetWorkloadStatus(id string) (gridtypes.ResultState, bool, error) { + globalID := gridtypes.WorkloadID(id) + twin, dlID, name, err := globalID.Parts() + if err != nil { + return "", false, err + } + + wl, err := e.storage.Current(twin, dlID, name) + + if isNotFoundError(err) { + return "", false, nil + } else if err != nil { + return "", false, err + } + + return wl.Result.State, true, nil +} + +// isTwinVerified make sure the account used is verified +func isTwinVerified(twinID uint32) (verified bool, err error) { + const verifiedStatus = "VERIFIED" + env := environment.MustGet() + + verificationServiceURL, err := url.JoinPath(env.KycURL, "/api/v1/status") + if err != nil { + return + } + + request, err := http.NewRequest(http.MethodGet, verificationServiceURL, nil) + if err != nil { + return + } + + q := request.URL.Query() + q.Set("twin_id", fmt.Sprint(twinID)) + request.URL.RawQuery = q.Encode() + + cl := retryablehttp.NewClient() + cl.HTTPClient.Timeout = defaultHttpTimeout + cl.RetryMax = 5 + + response, err := cl.StandardClient().Do(request) + if err != nil { + return + } + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + return verified, errors.New("failed to get twin verification status") + } + + var result struct{ Result struct{ Status string } } + + err = json.NewDecoder(response.Body).Decode(&result) + if err != nil { + return + } + + return result.Result.Status == verifiedStatus, nil +} diff --git a/pkg/provision/engine_test.go b/pkg/provision/engine_test.go new file mode 100644 index 00000000..179bc0b8 --- /dev/null +++ b/pkg/provision/engine_test.go @@ -0,0 +1,184 @@ +package provision + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" +) + +// func TestEngine(t *testing.T) { +// td, err := ioutil.TempDir("", "") +// require.NoError(t, err) +// defer t.Cleanup(func() { +// os.RemoveAll(td) +// }) + +// nodeID := "BhPhHVhfU8qerzzh1BGBgcQ7SQxQtND3JwuxSPoRzqkY" +// store := &FSStore{ +// root: td, +// } + +// engine := &defaultEngine{ +// nodeID: nodeID, +// store: store, +// } + +// mustJSONMarshal := func(v interface{}) []byte { +// b, err := json.Marshal(v) +// require.NoError(t, err) +// return b +// } + +// err = engine.store.Add(&Reservation{ +// ID: "1-1", +// Type: VolumeReservation, +// Data: mustJSONMarshal(Volume{ +// Size: 1, +// Type: HDDDiskType, +// }), +// }) +// require.NoError(t, err) + +// err = engine.store.Add(&Reservation{ +// ID: "3-1", +// Type: ZDBReservation, +// Data: mustJSONMarshal(ZDB{ +// Size: 15, +// Mode: pkg.ZDBModeSeq, +// DiskType: pkg.SSDDevice, +// }), +// }) +// require.NoError(t, err) + +// err = engine.store.Add(&Reservation{ +// ID: "4-1", +// Type: ContainerReservation, +// Data: mustJSONMarshal(Container{ +// Capacity: ContainerCapacity{ +// CPU: 2, +// Memory: 4096, +// }, +// }), +// }) +// require.NoError(t, err) + +// resources, workloads := engine.capacityUsed() +// assert.Equal(t, uint64(2), resources.Cru) +// assert.Equal(t, float64(4), resources.Mru) +// assert.Equal(t, float64(15.25), resources.Sru) +// assert.Equal(t, float64(1), resources.Hru) + +// assert.EqualValues(t, 1, workloads.Container) +// assert.EqualValues(t, 0, workloads.Network) +// assert.EqualValues(t, 1, workloads.Volume) +// assert.EqualValues(t, 1, workloads.ZDBNamespace) +// assert.EqualValues(t, 0, workloads.K8sVM) +// } + +func TestGetMountSize(t *testing.T) { + t.Run("invalid type", func(t *testing.T) { + wl := gridtypes.WorkloadWithID{ + Workload: &gridtypes.Workload{Type: "invalid"}, + } + _, err := getMountSize(wl.Workload) + assert.Error(t, err) + }) + t.Run("different data type", func(t *testing.T) { + wl := gridtypes.WorkloadWithID{ + Workload: &gridtypes.Workload{Type: zos.ZDBType, Data: json.RawMessage(`{"size": 10}`)}, + } + _, err := getMountSize(wl.Workload) + assert.Error(t, err) + }) + t.Run("valid data", func(t *testing.T) { + wl := gridtypes.WorkloadWithID{ + Workload: &gridtypes.Workload{Type: zos.ZMountType, Data: json.RawMessage(`{"size": 10}`)}, + } + size, err := getMountSize(wl.Workload) + assert.NoError(t, err) + assert.Equal(t, size, gridtypes.Unit(10)) + }) + t.Run("volumes", func(t *testing.T) { + wl := gridtypes.WorkloadWithID{ + Workload: &gridtypes.Workload{Type: zos.VolumeType, Data: json.RawMessage(`{"size": 10}`)}, + } + size, err := getMountSize(wl.Workload) + assert.NoError(t, err) + assert.Equal(t, size, gridtypes.Unit(10)) + }) + +} + +func TestSortMountWorkloads(t *testing.T) { + t.Run("zmounts", func(t *testing.T) { + workloads := []*gridtypes.WorkloadWithID{ + {Workload: &gridtypes.Workload{ + Type: zos.ZMountType, + Data: json.RawMessage(`{"size": 10}`), + }}, + {Workload: &gridtypes.Workload{ + Type: zos.ZMountType, + Data: json.RawMessage(`{"size": 30}`), + }}, + {Workload: &gridtypes.Workload{ + Type: zos.ZMountType, + Data: json.RawMessage(`{"size": 20}`), + }}, + } + + expectedWorkloads := []*gridtypes.WorkloadWithID{ + {Workload: &gridtypes.Workload{ + Type: zos.ZMountType, + Data: json.RawMessage(`{"size": 30}`), + }}, + {Workload: &gridtypes.Workload{ + Type: zos.ZMountType, + Data: json.RawMessage(`{"size": 20}`), + }}, + {Workload: &gridtypes.Workload{ + Type: zos.ZMountType, + Data: json.RawMessage(`{"size": 10}`), + }}, + } + + sortMountWorkloads(workloads) + assert.Equal(t, expectedWorkloads, workloads) + }) + t.Run("volumes", func(t *testing.T) { + workloads := []*gridtypes.WorkloadWithID{ + {Workload: &gridtypes.Workload{ + Type: zos.VolumeType, + Data: json.RawMessage(`{"size": 10}`), + }}, + {Workload: &gridtypes.Workload{ + Type: zos.VolumeType, + Data: json.RawMessage(`{"size": 30}`), + }}, + {Workload: &gridtypes.Workload{ + Type: zos.VolumeType, + Data: json.RawMessage(`{"size": 20}`), + }}, + } + + expectedWorkloads := []*gridtypes.WorkloadWithID{ + {Workload: &gridtypes.Workload{ + Type: zos.VolumeType, + Data: json.RawMessage(`{"size": 30}`), + }}, + {Workload: &gridtypes.Workload{ + Type: zos.VolumeType, + Data: json.RawMessage(`{"size": 20}`), + }}, + {Workload: &gridtypes.Workload{ + Type: zos.VolumeType, + Data: json.RawMessage(`{"size": 10}`), + }}, + } + + sortMountWorkloads(workloads) + assert.Equal(t, expectedWorkloads, workloads) + }) +} diff --git a/pkg/provision/interface.go b/pkg/provision/interface.go new file mode 100644 index 00000000..ce3ef37d --- /dev/null +++ b/pkg/provision/interface.go @@ -0,0 +1,149 @@ +package provision + +import ( + "context" + "fmt" + + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +// Twins is used to get twin public key +type Twins interface { + GetKey(id uint32) ([]byte, error) +} + +// Engine is engine interface +type Engine interface { + // Provision pushes a workload to engine queue. on success + // means that workload has been committed to storage (accepts) + // and will be processes later + Provision(ctx context.Context, wl gridtypes.Deployment) error + Deprovision(ctx context.Context, twin uint32, id uint64, reason string) error + Pause(ctx context.Context, twin uint32, id uint64) error + Resume(ctx context.Context, twin uint32, id uint64) error + Update(ctx context.Context, update gridtypes.Deployment) error + Storage() Storage + Twins() Twins + Admins() Twins +} + +// Provisioner interface. the errors returned by this interface are associated with +// provisioner errors, not workloads errors. The difference is, a failure to recognize the +// workload type for example, is a provisioner error. A workload error is when the workload +// fails to deploy and this is returned as Error state in the Result object (but nil error) +// Methods can return special error type ErrDidNotChange which instructs the engine that the +// workload provision was not carried on because it's already deployed, basically a no action +// needed indicator. In that case, the engine can ignore the returned result +type Provisioner interface { + // Initialize is called before the provision engine is started + Initialize(ctx context.Context) error + // Provision a workload + Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) + // Deprovision a workload + Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error + // Pause a workload + Pause(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) + // Resume a workload + Resume(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) + // Update a workload + Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) + // CanUpdate checks if this workload can be updated on the fly + CanUpdate(ctx context.Context, typ gridtypes.WorkloadType) bool +} + +// Filter is filtering function for Purge method + +var ( + // ErrDeploymentExists returned if object exist + ErrDeploymentExists = fmt.Errorf("exists") + // ErrWorkloadExists returned if object exist + ErrWorkloadExists = fmt.Errorf("exists") + // ErrDeploymentConflict returned if deployment cannot be stored because + // it conflicts with another deployment + ErrDeploymentConflict = fmt.Errorf("conflict") + // ErrDeploymentNotExists returned if object not exists + ErrDeploymentNotExists = fmt.Errorf("deployment does not exist") + // ErrWorkloadNotExist returned by storage if workload does not exist + ErrWorkloadNotExist = fmt.Errorf("workload does not exist") + // ErrNoActionNeeded can be returned by any provision method to indicate that + // no action has been taken in case a workload is already deployed and the + // engine then can skip updating the result of the workload. + // When returned, the data returned by the provision is ignored + ErrNoActionNeeded = fmt.Errorf("no action needed") + // ErrDeploymentUpgradeValidationError error, is returned if the deployment + // failed to compute upgrade steps + ErrDeploymentUpgradeValidationError = fmt.Errorf("upgrade validation error") + // ErrInvalidVersion invalid version error + ErrInvalidVersion = fmt.Errorf("invalid version") +) + +// Field interface +type ( + Field interface{} + VersionField struct { + Version uint32 + } +) + +type DescriptionField struct { + Description string +} + +type MetadataField struct { + Metadata string +} + +type SignatureRequirementField struct { + SignatureRequirement gridtypes.SignatureRequirement +} + +type StorageCapacity struct { + // Cap is total reserved capacity as per all active workloads + Cap gridtypes.Capacity + // Deployments is a list with all deployments that are active + Deployments []gridtypes.Deployment + // Workloads the total number of all workloads + Workloads int + // LastDeploymentTimestamp last deployment timestamp + LastDeploymentTimestamp gridtypes.Timestamp +} + +// Used with Storage interface to compute capacity, exclude any deployment +// and or workload that returns true from the capacity calculation. +type Exclude = func(dl *gridtypes.Deployment, wl *gridtypes.Workload) bool + +// Storage interface +type Storage interface { + // Create a new deployment in storage, it sets the initial transactions + // for all workloads to "init" and the correct creation time. + Create(deployment gridtypes.Deployment) error + // Update updates a deployment fields + Update(twin uint32, deployment uint64, fields ...Field) error + // Delete deletes a deployment from storage. + Delete(twin uint32, deployment uint64) error + // Get gets the current state of a deployment from storage + Get(twin uint32, deployment uint64) (gridtypes.Deployment, error) + // Error sets global deployment error + Error(twin uint32, deployment uint64, err error) error + // Add workload to deployment, if no active deployment exists with same name + Add(twin uint32, deployment uint64, workload gridtypes.Workload) error + // Remove a workload from deployment. + Remove(twin uint32, deployment uint64, name gridtypes.Name) error + // Transaction append a transaction to deployment transactions logs + Transaction(twin uint32, deployment uint64, workload gridtypes.Workload) error + // Changes return all the historic transactions of a deployment + Changes(twin uint32, deployment uint64) (changes []gridtypes.Workload, err error) + // Current gets last state of a workload by name + Current(twin uint32, deployment uint64, name gridtypes.Name) (gridtypes.Workload, error) + // Twins list twins in storage + Twins() ([]uint32, error) + // ByTwin return list of deployments for a twin + ByTwin(twin uint32) ([]uint64, error) + // return total capacity and active deployments + Capacity(exclude ...Exclude) (StorageCapacity, error) +} + +// Janitor interface +type Janitor interface { + Cleanup(ctx context.Context) error +} diff --git a/pkg/provision/mw/action.go b/pkg/provision/mw/action.go new file mode 100644 index 00000000..e05bce89 --- /dev/null +++ b/pkg/provision/mw/action.go @@ -0,0 +1,163 @@ +package mw + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/rs/zerolog/log" +) + +// Response interface +type Response interface { + Status() int + Err() error + + // header getter + Header() http.Header + // header setter + WithHeader(k, v string) Response +} + +// Action interface +type Action func(r *http.Request) (interface{}, Response) + +// AsHandlerFunc is a helper wrapper to make implementing actions easier +func AsHandlerFunc(a Action) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + defer func() { + _, _ = io.ReadAll(r.Body) + _ = r.Body.Close() + }() + + object, result := a(r) + + w.Header().Set("Content-Type", "application/json") + + if result == nil { + w.WriteHeader(http.StatusOK) + } else { + + h := result.Header() + for k := range h { + for _, v := range h.Values(k) { + w.Header().Add(k, v) + } + } + + w.WriteHeader(result.Status()) + if err := result.Err(); err != nil { + log.Error().Msgf("%s", err.Error()) + object = struct { + Error string `json:"error"` + }{ + Error: err.Error(), + } + } + } + + if err := json.NewEncoder(w).Encode(object); err != nil { + log.Error().Err(err).Msg("failed to encode return object") + } + } +} + +type genericResponse struct { + status int + err error + header http.Header +} + +func (r genericResponse) Status() int { + return r.status +} + +func (r genericResponse) Err() error { + return r.err +} + +func (r genericResponse) Header() http.Header { + if r.header == nil { + r.header = http.Header{} + } + return r.header +} + +func (r genericResponse) WithHeader(k, v string) Response { + if r.header == nil { + r.header = http.Header{} + } + + r.header.Add(k, v) + return r +} + +// Created return a created response +func Created() Response { + return genericResponse{status: http.StatusCreated} +} + +// Ok return a ok response +func Ok() Response { + return genericResponse{status: http.StatusOK} +} + +// Error generic error response +func Error(err error, code ...int) Response { + status := http.StatusInternalServerError + if len(code) > 0 { + status = code[0] + } + + if err == nil { + err = fmt.Errorf("no message") + } + + return genericResponse{status: status, err: err} +} + +// BadRequest result +func BadRequest(err error) Response { + return Error(err, http.StatusBadRequest) +} + +// PaymentRequired result +func PaymentRequired(err error) Response { + return Error(err, http.StatusPaymentRequired) +} + +// NotFound response +func NotFound(err error) Response { + return Error(err, http.StatusNotFound) +} + +// Conflict response +func Conflict(err error) Response { + return Error(err, http.StatusConflict) +} + +// UnAuthorized response +func UnAuthorized(err error) Response { + return Error(err, http.StatusUnauthorized) +} + +// Forbidden response +func Forbidden(err error) Response { + return Error(err, http.StatusForbidden) +} + +// NoContent response +func NoContent() Response { + return genericResponse{status: http.StatusNoContent} +} + +// Accepted response +func Accepted() Response { + return genericResponse{status: http.StatusAccepted} +} + +// Unavailable returned when server is too busy +func Unavailable(err error) Response { + return Error(err, http.StatusServiceUnavailable) +} diff --git a/pkg/provision/mw/auth.go b/pkg/provision/mw/auth.go new file mode 100644 index 00000000..d8415fda --- /dev/null +++ b/pkg/provision/mw/auth.go @@ -0,0 +1,125 @@ +package mw + +import ( + "context" + "crypto/ed25519" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "strconv" + "time" + + "github.com/gorilla/mux" + "github.com/lestrrat-go/jwx/jwa" + "github.com/lestrrat-go/jwx/jwt" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zos4/pkg/provision" +) + +type ( + twinPublicKeyID struct{} + twinKeyID struct{} +) + +// TwinPublicKey extracts twin public key from request +func TwinPublicKey(ctx context.Context) ed25519.PublicKey { + value := ctx.Value(twinPublicKeyID{}) + return value.(ed25519.PublicKey) +} + +// TwinID extracts twin id from request +func TwinID(ctx context.Context) uint32 { + value := ctx.Value(twinKeyID{}) + return value.(uint32) +} + +// UserMap implements provision.Twins for the users collections +type UserMap map[uint32]ed25519.PublicKey + +// NewUserMap create a new UserMap that uses the users collection +// to find the key +func NewUserMap() UserMap { + return UserMap{} +} + +// AddKeyFromHex adds a user key to map from a hex string +func (u UserMap) AddKeyFromHex(id uint32, key string) error { + k, err := hex.DecodeString(key) + if err != nil { + return err + } + u[id] = ed25519.PublicKey(k) + return nil +} + +// GetKey implements interface +func (u UserMap) GetKey(id uint32) ([]byte, error) { + key, ok := u[id] + if !ok { + return nil, fmt.Errorf("unknown user id '%d' in key map", id) + } + return key, nil +} + +func writeError(w http.ResponseWriter, err error) { + w.WriteHeader(http.StatusUnauthorized) + + object := struct { + Error string `json:"error"` + }{ + Error: err.Error(), + } + if err := json.NewEncoder(w).Encode(object); err != nil { + log.Error().Err(err).Msg("failed to encode return object") + } +} + +// NewAuthMiddleware creates a new AuthMiddleware using jwt signed by the caller +func NewAuthMiddleware(users provision.Twins) mux.MiddlewareFunc { + return func(handler http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + token, err := jwt.ParseHeader(r.Header, "authorization", + jwt.WithValidate(true), + jwt.WithAudience("zos"), + jwt.WithAcceptableSkew(10*time.Second), + ) + if err != nil { + writeError(w, errors.Wrap(err, "failed to parse jwt token")) + return + } + + if time.Until(token.Expiration()) > 2*time.Minute { + writeError(w, fmt.Errorf("the expiration date should not be more than 2 minutes")) + return + } + twinID, err := strconv.ParseUint(token.Issuer(), 10, 32) + if err != nil { + writeError(w, errors.Wrap(err, "failed to parse issued id, expecting a 32 bit uint")) + return + } + pk, err := users.GetKey(uint32(twinID)) + if err != nil { + writeError(w, errors.Wrap(err, "failed to get twin public key")) + return + } + // reparse the token but with signature validation + _, err = jwt.ParseHeader(r.Header, "authorization", jwt.WithValidate(true), + jwt.WithAudience("zos"), + jwt.WithAcceptableSkew(10*time.Second), + jwt.WithVerify(jwa.EdDSA, pk), + ) + if err != nil { + writeError(w, errors.Wrap(err, "failed to get twin public key")) + return + } + + ctx := r.Context() + ctx = context.WithValue(ctx, twinKeyID{}, uint32(twinID)) + ctx = context.WithValue(ctx, twinPublicKeyID{}, pk) + + handler.ServeHTTP(w, r.WithContext(ctx)) + }) + } +} diff --git a/pkg/provision/provisiner.go b/pkg/provision/provisiner.go new file mode 100644 index 00000000..9729848c --- /dev/null +++ b/pkg/provision/provisiner.go @@ -0,0 +1,260 @@ +package provision + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/pkg/errors" + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +// Response interface for custom error responses +// you never need to implement this interface +// can only be returned by one of the methods in this +// module. + +type Response interface { + error + state() gridtypes.ResultState + err() error +} + +type response struct { + s gridtypes.ResultState + e error +} + +func (r *response) Error() string { + if err := r.err(); err != nil { + return err.Error() + } + + return "" +} + +func (r *response) Unwrap() error { + return r.e +} + +func (r *response) state() gridtypes.ResultState { + return r.s +} + +func (r *response) err() error { + return r.e +} + +// Ok response. you normally don't need to return +// this from Manager methods. instead returning `nil` error +// is preferred. +func Ok() Response { + return &response{s: gridtypes.StateOk} +} + +// UnChanged is a special response status that states that an operation has failed +// but this did not affect the workload status. Usually during an update when the +// update could not carried out, but the workload is still running correctly with +// previous config +func UnChanged(cause error) Response { + return &response{s: gridtypes.StateUnChanged, e: cause} +} + +func Paused() Response { + return &response{s: gridtypes.StatePaused, e: fmt.Errorf("paused")} +} + +// Manager defines basic type manager functionality. This interface +// declares the provision and the deprovision method which is required +// by any Type manager. +type Manager interface { + Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) + Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error +} + +// Initializer interface define an extra Initialize method which is run on the provisioner +// before the provision engine is started. +type Initializer interface { + Initialize(ctx context.Context) error +} + +// Updater defines the optional Update method for a type manager. Types are allowed +// to implement update to change their settings on the fly +type Updater interface { + Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) +} + +// Pauser defines optional Pause, Resume method for type managers. Types are allowed +// to implement pause, resume to put the workload in paused state where it's not usable +// by the user but at the same time not completely deleted. +type Pauser interface { + Pause(ctx context.Context, wl *gridtypes.WorkloadWithID) error + Resume(ctx context.Context, wl *gridtypes.WorkloadWithID) error +} + +type mapProvisioner struct { + managers map[gridtypes.WorkloadType]Manager +} + +// NewMapProvisioner returns a new instance of a map provisioner +func NewMapProvisioner(managers map[gridtypes.WorkloadType]Manager) Provisioner { + return &mapProvisioner{ + managers: managers, + } +} + +func (p *mapProvisioner) Initialize(ctx context.Context) error { + for typ, mgr := range p.managers { + init, ok := mgr.(Initializer) + if !ok { + continue + } + + if err := init.Initialize(ctx); err != nil { + return errors.Wrapf(err, "failed to run initializers for workload type '%s'", typ) + } + } + + return nil +} + +// Provision implements provision.Provisioner +func (p *mapProvisioner) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (result gridtypes.Result, err error) { + manager, ok := p.managers[wl.Type] + if !ok { + return result, fmt.Errorf("unknown workload type '%s' for reservation id '%s'", wl.Type, wl.ID) + } + + data, err := manager.Provision(ctx, wl) + if errors.Is(err, ErrNoActionNeeded) { + return result, err + } + + return buildResult(data, err) +} + +// Decommission implementation for provision.Provisioner +func (p *mapProvisioner) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + manager, ok := p.managers[wl.Type] + if !ok { + return fmt.Errorf("unknown workload type '%s' for reservation id '%s'", wl.Type, wl.ID) + } + + return manager.Deprovision(ctx, wl) +} + +// Pause a workload +func (p *mapProvisioner) Pause(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) { + if wl.Result.State != gridtypes.StateOk { + return wl.Result, fmt.Errorf("can only pause workloads in ok state") + } + + manager, ok := p.managers[wl.Type] + if !ok { + return wl.Result, fmt.Errorf("unknown workload type '%s' for reservation id '%s'", wl.Type, wl.ID) + } + + // change all status to Paused + var err error = Paused() + // unless there is specific implementation to + // pause a work load, we call it. + mgr, ok := manager.(Pauser) + if ok { + err = mgr.Pause(ctx, wl) + } + + // update the result object. this way we make sure data + // does not change across pause/resume changes + result := wl.Result + setState(&result, err) + return result, nil +} + +// Resume a workload +func (p *mapProvisioner) Resume(ctx context.Context, wl *gridtypes.WorkloadWithID) (gridtypes.Result, error) { + if wl.Result.State != gridtypes.StatePaused { + return wl.Result, fmt.Errorf("can only resume workloads in paused state") + } + + manager, ok := p.managers[wl.Type] + if !ok { + return wl.Result, fmt.Errorf("unknown workload type '%s' for reservation id '%s'", wl.Type, wl.ID) + } + // change all status to Paused + var err error = Ok() + // unless there is specific implementation to + // pause a work load, we call it. + mgr, ok := manager.(Pauser) + if ok { + err = mgr.Resume(ctx, wl) + } + + // update the result object. this way we make sure data + // does not change across pause/resume changes + result := wl.Result + setState(&result, err) + return result, nil +} + +// Provision implements provision.Provisioner +func (p *mapProvisioner) Update(ctx context.Context, wl *gridtypes.WorkloadWithID) (result gridtypes.Result, err error) { + manager, ok := p.managers[wl.Type] + if !ok { + return result, fmt.Errorf("unknown workload type '%s' for reservation id '%s'", wl.Type, wl.ID) + } + + updater, ok := manager.(Updater) + if !ok { + return result, fmt.Errorf("workload type '%s' does not support updating", wl.Type) + } + + data, err := updater.Update(ctx, wl) + if errors.Is(err, ErrNoActionNeeded) { + return result, err + } + + return buildResult(data, err) +} + +func (p *mapProvisioner) CanUpdate(ctx context.Context, typ gridtypes.WorkloadType) bool { + manager, ok := p.managers[typ] + if !ok { + return false + } + + _, ok = manager.(Updater) + return ok +} + +func setState(result *gridtypes.Result, err error) { + result.Created = gridtypes.Now() + state := gridtypes.StateOk + str := "" + + if err != nil { + str = err.Error() + state = gridtypes.StateError + + var resp *response + if errors.As(err, &resp) { + state = resp.state() + } + } + + result.State = state + result.Error = str +} + +func buildResult(data interface{}, err error) (gridtypes.Result, error) { + var result gridtypes.Result + setState(&result, err) + + br, err := json.Marshal(data) + if err != nil { + return result, errors.Wrap(err, "failed to encode result") + } + + result.Data = br + + return result, nil +} diff --git a/pkg/provision/provisioner_test.go b/pkg/provision/provisioner_test.go new file mode 100644 index 00000000..618dfb41 --- /dev/null +++ b/pkg/provision/provisioner_test.go @@ -0,0 +1,152 @@ +package provision + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + "github.com/pkg/errors" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +func TestBuildResult(t *testing.T) { + type Case struct { + in error + out gridtypes.Result + } + cases := []Case{ + { + in: nil, + out: gridtypes.Result{ + State: gridtypes.StateOk, + Error: "", + }, + }, + { + in: Ok(), + out: gridtypes.Result{ + State: gridtypes.StateOk, + Error: "", + }, + }, + { + in: fmt.Errorf("something went wrong"), + out: gridtypes.Result{ + State: gridtypes.StateError, + Error: "something went wrong", + }, + }, + { + in: UnChanged(fmt.Errorf("failed to update")), + out: gridtypes.Result{ + State: gridtypes.StateUnChanged, + Error: "failed to update", + }, + }, + { + in: errors.Wrap(UnChanged(fmt.Errorf("failed to update")), "wrapped"), + out: gridtypes.Result{ + State: gridtypes.StateUnChanged, + Error: "wrapped: failed to update", + }, + }, + { + in: Paused(), + out: gridtypes.Result{ + State: gridtypes.StatePaused, + Error: "paused", + }, + }, + { + in: errors.Wrap(Paused(), "wrapped for some reason"), + out: gridtypes.Result{ + State: gridtypes.StatePaused, + Error: "wrapped for some reason: paused", + }, + }, + } + + for _, c := range cases { + t.Run("", func(t *testing.T) { + result, err := buildResult(nil, c.in) + require.NoError(t, err) + + require.Equal(t, c.out.State, result.State) + require.Equal(t, c.out.Error, result.Error) + }) + } +} + +var ( + testWorkloadType gridtypes.WorkloadType = "test" +) + +type testManagerFull struct { + mock.Mock +} + +func (t *testManagerFull) Provision(ctx context.Context, wl *gridtypes.WorkloadWithID) (interface{}, error) { + args := t.Called(ctx, wl) + return args.Get(0), args.Error(1) +} + +func (t *testManagerFull) Deprovision(ctx context.Context, wl *gridtypes.WorkloadWithID) error { + args := t.Called(ctx, wl) + return args.Error(0) +} + +func TestProvision(t *testing.T) { + require := require.New(t) + var mgr testManagerFull + provisioner := NewMapProvisioner(map[gridtypes.WorkloadType]Manager{ + testWorkloadType: &mgr, + }) + + ctx := context.Background() + wl := gridtypes.WorkloadWithID{ + Workload: &gridtypes.Workload{ + Type: testWorkloadType, + }, + } + + mgr.On("Provision", mock.Anything, &wl).Return(123, nil) + result, err := provisioner.Provision(ctx, &wl) + + require.NoError(err) + require.Equal(gridtypes.StateOk, result.State) + require.Equal(json.RawMessage("123"), result.Data) + + mgr.ExpectedCalls = nil + mgr.On("Provision", mock.Anything, &wl).Return(nil, fmt.Errorf("failed to run")) + result, err = provisioner.Provision(ctx, &wl) + + require.NoError(err) + require.Equal(gridtypes.StateError, result.State) + require.Equal("failed to run", result.Error) + + mgr.ExpectedCalls = nil + mgr.On("Pause", mock.Anything, &wl).Return(nil, nil) + result, err = provisioner.Pause(ctx, &wl) + + require.Errorf(err, "can only pause workloads in ok state") + + mgr.ExpectedCalls = nil + wl = gridtypes.WorkloadWithID{ + Workload: &gridtypes.Workload{ + Type: testWorkloadType, + Result: gridtypes.Result{ + State: gridtypes.StateOk, + }, + }, + } + + // not here paused will set the right state even if manager + // does not support this state. + mgr.On("Pause", mock.Anything, &wl).Return(nil, nil) + result, err = provisioner.Pause(ctx, &wl) + require.NoError(err) + require.Equal(gridtypes.StatePaused, result.State) +} diff --git a/pkg/provision/resource_units.go b/pkg/provision/resource_units.go new file mode 100644 index 00000000..e1779546 --- /dev/null +++ b/pkg/provision/resource_units.go @@ -0,0 +1,13 @@ +package provision + +// ResourceUnits type +type ResourceUnits string + +// ResourcesUnits are the units used to compute how much +// capacity is reserved on the system +var ( + ResourceUnitsCRU = ResourceUnits("CRU") + ResourceUnitsMRU = ResourceUnits("MRU") + ResourceUnitsHRU = ResourceUnits("HRU") + ResourceUnitsSRU = ResourceUnits("SRU") +) diff --git a/pkg/provision/storage.fs/shared.go b/pkg/provision/storage.fs/shared.go new file mode 100644 index 00000000..da0c4cea --- /dev/null +++ b/pkg/provision/storage.fs/shared.go @@ -0,0 +1,101 @@ +package storage + +import ( + "fmt" + "io/fs" + "os" + "path/filepath" + "strconv" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +// Shared, returns a workload id that reference the workload with that +// name. The workload type must be of a `Shared` type. +// A Shared workload type means that the workload (of that type) can be +// accessed by other deployments for the same twin. A shared workload +// should be only updatable only via the deployment that creates it +func (s *Fs) GetShared(twinID uint32, name gridtypes.Name) (gridtypes.WorkloadID, error) { + s.m.RLock() + defer s.m.RUnlock() + + return s.shared(twinID, name) +} + +func (s *Fs) SharedByTwin(twinID uint32) ([]gridtypes.WorkloadID, error) { + s.m.RLock() + defer s.m.RUnlock() + + return s.sharedByTwin(twinID) +} + +func (s *Fs) sharedByTwin(twinID uint32) ([]gridtypes.WorkloadID, error) { + root := filepath.Join(s.root, sharedSubDir, fmt.Sprint(twinID)) + infos, err := os.ReadDir(root) + if os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, errors.Wrap(err, "failed to list shared user workloads") + } + var ids []gridtypes.WorkloadID + for _, entry := range infos { + info, err := entry.Info() + if err != nil { + return nil, err + } + if info.Mode().Type() != fs.ModeSymlink { + log.Warn(). + Uint32("twin", twinID). + Str("name", info.Name()). + Msg("found non symlink file in twin shared workloads") + continue + } + + id, err := s.shared(twinID, gridtypes.Name(info.Name())) + if err != nil { + return nil, err + } + + ids = append(ids, id) + } + + return ids, nil +} +func (s *Fs) shared(twinID uint32, name gridtypes.Name) (id gridtypes.WorkloadID, err error) { + link := s.rooted(s.sharedLinkPath(twinID, name)) + target, err := os.Readlink(link) + if err != nil { + return id, errors.Wrapf(err, "failed to read link to deployment from '%s'", link) + } + // target has base name as the 'contract id' + dl, err := strconv.ParseUint(filepath.Base(target), 10, 32) + if err != nil { + return id, errors.Wrapf(err, "invalid link '%s' to target '%s'", link, target) + } + + return gridtypes.NewUncheckedWorkloadID(twinID, dl, name), nil +} + +func (s *Fs) sharedCreate(d *gridtypes.Deployment, name gridtypes.Name) error { + target := s.rooted(s.deploymentPath(d)) + src := s.rooted(s.sharedLinkPath(d.TwinID, name)) + + dir := filepath.Dir(src) + if err := os.MkdirAll(dir, 0755); err != nil { + return errors.Wrap(err, "failed to create shared twin directory") + } + + target, err := filepath.Rel(dir, target) + if err != nil { + return err + } + + return os.Symlink(target, src) +} + +func (s *Fs) sharedDelete(d *gridtypes.Deployment, name gridtypes.Name) error { + src := s.rooted(s.sharedLinkPath(d.TwinID, name)) + return os.Remove(src) +} diff --git a/pkg/provision/storage.fs/storage.go b/pkg/provision/storage.fs/storage.go new file mode 100644 index 00000000..d67af209 --- /dev/null +++ b/pkg/provision/storage.fs/storage.go @@ -0,0 +1,371 @@ +package storage + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strconv" + "sync" + + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/versioned" +) + +var ( + // deploymentSchemaV1 reservation schema version 1 + deploymentSchemaV1 = versioned.MustParse("1.0.0") + // ReservationSchemaLastVersion link to latest version + deploymentSchemaLastVersion = deploymentSchemaV1 + + sharedSubDir = "shared" +) + +// Fs is a in reservation cache using the filesystem as backend +type Fs struct { + m sync.RWMutex + root string +} + +// NewFSStore creates a in memory reservation store +func NewFSStore(root string) (*Fs, error) { + store := &Fs{ + root: root, + } + + for _, dir := range []string{sharedSubDir} { + if err := os.MkdirAll(filepath.Join(root, dir), 0770); err != nil { + return nil, err + } + } + + return store, nil +} + +func (s *Fs) sharedLinkPath(twinID uint32, name gridtypes.Name) string { + return filepath.Join(sharedSubDir, fmt.Sprint(twinID), string(name)) +} + +func (s *Fs) deploymentPath(d *gridtypes.Deployment) string { + return filepath.Join(fmt.Sprint(d.TwinID), fmt.Sprint(d.ContractID)) +} + +func (s *Fs) rooted(p ...string) string { + return filepath.Join(s.root, filepath.Join(p...)) +} + +// Delete is only used for the migration +func (s *Fs) Delete(d gridtypes.Deployment) error { + s.m.Lock() + defer s.m.Unlock() + + path := s.rooted(s.deploymentPath(&d)) + return os.RemoveAll(path) +} + +// Add workload to database +func (s *Fs) Add(d gridtypes.Deployment) error { + s.m.Lock() + defer s.m.Unlock() + + path := s.rooted(s.deploymentPath(&d)) + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return errors.Wrap(err, "failed to crate directory") + } + + // make sure that this deployment does not actually + // redefine a "sharable" workload. + for _, wl := range d.GetShareables() { + conflict, err := s.shared(d.TwinID, wl.Name) + if err == nil { + return errors.Wrapf( + provision.ErrDeploymentConflict, + "sharable workload '%s' is conflicting with another workload '%s'", + string(wl.Name), conflict) + } else if err != nil && !errors.Is(err, os.ErrNotExist) { + return errors.Wrap(err, "failed to check conflicts") + } + } + + file, err := os.OpenFile( + path, + os.O_CREATE|os.O_WRONLY|os.O_EXCL, + 0644, + ) + + if os.IsExist(err) { + return errors.Wrapf(provision.ErrDeploymentExists, "object '%d' exist", d.ContractID) + } else if err != nil { + return errors.Wrap(err, "failed to open workload file") + } + + defer file.Close() + writer, err := versioned.NewWriter(file, deploymentSchemaLastVersion) + if err != nil { + return errors.Wrap(err, "failed to create versioned writer") + } + + if err := json.NewEncoder(writer).Encode(d); err != nil { + return errors.Wrap(err, "failed to write workload data") + } + + // make sure that this deployment does not actually + // redefine a "sharable" workload. + for _, wl := range d.GetShareables() { + if err := s.sharedCreate(&d, wl.Name); err != nil { + return errors.Wrap(err, "failed to store sharable workloads") + } + } + + return nil +} + +// Set updates value of a workload, the reservation must exists +// otherwise an error is returned +func (s *Fs) Set(dl gridtypes.Deployment) error { + s.m.Lock() + defer s.m.Unlock() + + sharedIDs, err := s.sharedByTwin(dl.TwinID) + if err != nil { + return errors.Wrap(err, "failed to get all sharable user workloads") + } + + this := map[gridtypes.Name]gridtypes.WorkloadID{} + taken := map[gridtypes.Name]gridtypes.WorkloadID{} + for _, shared := range sharedIDs { + _, contract, name, _ := shared.Parts() + if contract == dl.ContractID { + this[name] = shared + } else { + taken[name] = shared + } + } + + // does this workload defines a new sharable workload. In that case + // we need to make sure that this does not conflict with the current + // set of twin sharable workloads. but should not conflict with itself + for _, wl := range dl.GetShareables() { + if conflict, ok := taken[wl.Name]; ok { + return errors.Wrapf( + provision.ErrDeploymentConflict, + "sharable workload '%s' is conflicting with another workload '%s'", + string(wl.Name), conflict) + } + } + + path := s.rooted(s.deploymentPath(&dl)) + file, err := os.OpenFile( + path, + os.O_WRONLY|os.O_TRUNC, + 0644, + ) + if os.IsNotExist(err) { + return errors.Wrapf(provision.ErrDeploymentNotExists, "deployment '%d:%d' does not exist", dl.TwinID, dl.ContractID) + } else if err != nil { + return errors.Wrap(err, "failed to open workload file") + } + defer file.Close() + writer, err := versioned.NewWriter(file, deploymentSchemaLastVersion) + if err != nil { + return errors.Wrap(err, "failed to create versioned writer") + } + + if err := json.NewEncoder(writer).Encode(dl); err != nil { + return errors.Wrap(err, "failed to write workload data") + } + + // now we make sure that all sharable (and active) workloads + // on this deployment is referenced correctly + var tolink []gridtypes.Name + for _, wl := range dl.GetShareables() { + // if workload result is not set yet. or if the state is OK + // it means the workload still need to be treated as shared object + if wl.Result.IsNil() || wl.Result.State.IsOkay() { + // workload with no results, so we should keep the link + if _, ok := this[wl.Name]; ok { + // avoid unlinking + delete(this, wl.Name) + } else { + // or if new, add to tolink + tolink = append(tolink, wl.Name) + } + } + // if result state is set to anything else (deleted, or error) + // we then leave it in the `this` map which means they + // get to be unlinked. so the name can be used again by the same twin + } + // we either removed the names that should be kept (state = ok or no result yet) + // and new ones have been added to tolink. so it's safe to clean up all links + // in this. + for name := range this { + if err := s.sharedDelete(&dl, name); err != nil { + log.Error().Err(err). + Uint64("contract", dl.ContractID). + Stringer("name", name). + Msg("failed to clean up shared workload '%d.%s'") + } + } + + for _, new := range tolink { + if err := s.sharedCreate(&dl, new); err != nil { + return err + } + } + + return nil +} + +// Get gets a workload by id +func (s *Fs) get(path string) (gridtypes.Deployment, error) { + var wl gridtypes.Deployment + file, err := os.Open(path) + if os.IsNotExist(err) { + return wl, errors.Wrapf(provision.ErrDeploymentNotExists, "deployment '%s' does not exist", path) + } else if err != nil { + return wl, errors.Wrap(err, "failed to open workload file") + } + defer file.Close() + reader, err := versioned.NewReader(file) + if err != nil { + return wl, errors.Wrap(err, "failed to load workload") + } + version := reader.Version() + if !version.EQ(deploymentSchemaV1) { + return wl, fmt.Errorf("invalid workload version") + } + + if err := json.NewDecoder(reader).Decode(&wl); err != nil { + return wl, errors.Wrap(err, "failed to read workload data") + } + + return wl, nil +} + +// Get gets a workload by id +func (s *Fs) Get(twin uint32, deployment uint64) (gridtypes.Deployment, error) { + s.m.RLock() + defer s.m.RUnlock() + + path := s.rooted(filepath.Join(fmt.Sprint(twin), fmt.Sprint(deployment))) + + return s.get(path) +} + +// ByTwin return list of deployment for given twin id +func (s *Fs) ByTwin(twin uint32) ([]uint64, error) { + s.m.RLock() + defer s.m.RUnlock() + return s.byTwin(twin) +} + +func (s *Fs) byTwin(twin uint32) ([]uint64, error) { + base := filepath.Join(s.root, fmt.Sprint(twin)) + + entities, err := os.ReadDir(base) + if os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, errors.Wrap(err, "failed to list twin directory") + } + ids := make([]uint64, 0, len(entities)) + for _, entry := range entities { + if entry.IsDir() { + continue + } + + id, err := strconv.ParseUint(entry.Name(), 10, 32) + if err != nil { + log.Error().Str("name", entry.Name()).Err(err).Msg("invalid deployment id file") + continue + } + + ids = append(ids, id) + } + + return ids, nil +} + +// Twins lists available users +func (s *Fs) Twins() ([]uint32, error) { + s.m.RLock() + defer s.m.RUnlock() + + return s.twins() +} + +func (s *Fs) twins() ([]uint32, error) { + entities, err := os.ReadDir(s.root) + if os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, errors.Wrap(err, "failed to list twins directory") + } + ids := make([]uint32, 0, len(entities)) + for _, entry := range entities { + if !entry.IsDir() || entry.Name() == sharedSubDir { + continue + } + + id, err := strconv.ParseUint(entry.Name(), 10, 32) + if err != nil { + log.Error().Str("name", entry.Name()).Err(err).Msg("invalid twin id directory, removing") + os.RemoveAll(filepath.Join(s.root, entry.Name())) + continue + } + + ids = append(ids, uint32(id)) + } + + return ids, nil +} + +// Capacity returns the total capacity of all deployments +// that are in OK state. +func (s *Fs) Capacity() (cap gridtypes.Capacity, err error) { + s.m.RLock() + defer s.m.RUnlock() + + twins, err := s.twins() + if err != nil { + return cap, err + } + + for _, twin := range twins { + ids, err := s.byTwin(twin) + if err != nil { + return cap, err + } + + for _, id := range ids { + p := s.rooted(fmt.Sprint(twin), fmt.Sprint(id)) + deployment, err := s.get(p) + if err != nil { + return cap, err + } + + for _, wl := range deployment.Workloads { + if !wl.Result.State.IsOkay() { + continue + } + + c, err := wl.Capacity() + if err != nil { + return cap, err + } + + cap.Add(&c) + } + } + } + + return +} + +// Close makes sure the backend of the store is closed properly +func (s *Fs) Close() error { + return nil +} diff --git a/pkg/provision/storage.fs/storage_test.go b/pkg/provision/storage.fs/storage_test.go new file mode 100644 index 00000000..2b48d8e1 --- /dev/null +++ b/pkg/provision/storage.fs/storage_test.go @@ -0,0 +1,387 @@ +package storage + +import ( + "errors" + "fmt" + "io" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +var ( + TestType = gridtypes.WorkloadType("test") + TestSharableType = gridtypes.WorkloadType("sharable") +) + +type TestData struct{} + +func (t TestData) Valid(getter gridtypes.WorkloadGetter) error { + return nil +} + +func (t TestData) Challenge(w io.Writer) error { + return nil +} + +func (t TestData) Capacity() (gridtypes.Capacity, error) { + return gridtypes.Capacity{}, nil +} + +func init() { + gridtypes.RegisterType(TestType, TestData{}) + gridtypes.RegisterSharableType(TestSharableType, TestData{}) +} + +func TestStorageAdd(t *testing.T) { + require := require.New(t) + root := t.TempDir() + + store, err := NewFSStore(root) + require.NoError(err) + + twin := uint32(1) + id := uint64(1) + err = store.Add(gridtypes.Deployment{ + TwinID: twin, + ContractID: id, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "volume", + Type: TestType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + }) + + require.NoError(err) + stat, err := os.Lstat(filepath.Join(root, fmt.Sprint(twin), fmt.Sprint(id))) + require.NoError(err) + require.True(stat.Mode().IsRegular()) +} + +func TestStorageAddSharable(t *testing.T) { + require := require.New(t) + root := t.TempDir() + + store, err := NewFSStore(root) + require.NoError(err) + + twin := uint32(1) + id := uint64(1) + err = store.Add(gridtypes.Deployment{ + TwinID: twin, + ContractID: id, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "volume", + Type: TestType, + Data: gridtypes.MustMarshal(TestData{}), + }, + { + Name: "shared", + Type: TestSharableType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + }) + + require.NoError(err) + stat, err := os.Lstat(filepath.Join(root, fmt.Sprint(twin), fmt.Sprint(id))) + require.NoError(err) + require.True(stat.Mode().IsRegular()) + + shared, err := store.SharedByTwin(twin) + require.NoError(err) + require.Len(shared, 1) + require.Equal(gridtypes.NewUncheckedWorkloadID(twin, 1, "shared"), shared[0]) +} + +func TestStorageAddConflictingSharable(t *testing.T) { + require := require.New(t) + root := t.TempDir() + + store, err := NewFSStore(root) + require.NoError(err) + + twin := uint32(1) + id := uint64(1) + err = store.Add(gridtypes.Deployment{ + TwinID: twin, + ContractID: id, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "volume", + Type: TestType, + Data: gridtypes.MustMarshal(TestData{}), + }, + { + Name: "shared", + Type: TestSharableType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + }) + + require.NoError(err) + + err = store.Add(gridtypes.Deployment{ + TwinID: twin, + ContractID: 2, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "shared", + Type: TestSharableType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + }) + + require.Error(err) + require.True(errors.Is(err, provision.ErrDeploymentConflict)) + + wlID, err := store.GetShared(twin, "shared") + require.NoError(err) + require.Equal(gridtypes.NewUncheckedWorkloadID(twin, id, "shared"), wlID) +} + +func TestStorageSetSharable(t *testing.T) { + require := require.New(t) + root := t.TempDir() + + store, err := NewFSStore(root) + require.NoError(err) + + twin := uint32(1) + id := uint64(1) + err = store.Add(gridtypes.Deployment{ + TwinID: twin, + ContractID: id, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "shared", + Type: TestSharableType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + }) + + require.NoError(err) + + shared, err := store.SharedByTwin(twin) + require.NoError(err) + require.Len(shared, 1) + require.Equal(gridtypes.NewUncheckedWorkloadID(twin, 1, "shared"), shared[0]) + + err = store.Set(gridtypes.Deployment{ + TwinID: twin, + ContractID: id, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "shared", + Type: TestSharableType, + Data: gridtypes.MustMarshal(TestData{}), + Result: gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateDeleted, + }, + }, + { + Name: "new", + Type: TestSharableType, + Data: gridtypes.MustMarshal(TestData{}), + Result: gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateOk, + }, + }, + { + Name: "errord", + Type: TestSharableType, + Data: gridtypes.MustMarshal(TestData{}), + Result: gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateError, + }, + }, + }, + }) + + require.NoError(err) + + shared, err = store.SharedByTwin(twin) + require.NoError(err) + require.Len(shared, 1) + require.Equal(gridtypes.NewUncheckedWorkloadID(twin, 1, "new"), shared[0]) + + err = store.Add(gridtypes.Deployment{ + TwinID: twin, + ContractID: 2, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "new", + Type: TestSharableType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + }) + + require.Error(err) + require.True(errors.Is(err, provision.ErrDeploymentConflict)) + + wlID, err := store.GetShared(twin, "new") + require.NoError(err) + require.Equal(gridtypes.NewUncheckedWorkloadID(twin, id, "new"), wlID) +} + +func TestStorageSet(t *testing.T) { + require := require.New(t) + root := t.TempDir() + + store, err := NewFSStore(root) + require.NoError(err) + + twin := uint32(1) + id := uint64(1) + deployment := gridtypes.Deployment{ + TwinID: twin, + ContractID: id, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "volume", + Type: TestType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + } + + err = store.Set(deployment) + + require.Error(err) + require.True(errors.Is(err, provision.ErrDeploymentNotExists)) + + err = store.Add(deployment) + require.NoError(err) + + err = store.Set(deployment) + require.NoError(err) +} + +func TestStorageGet(t *testing.T) { + require := require.New(t) + root := t.TempDir() + + store, err := NewFSStore(root) + require.NoError(err) + twin := uint32(1) + id := uint64(1) + deployment := gridtypes.Deployment{ + TwinID: twin, + ContractID: id, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "volume", + Type: TestType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + } + + err = store.Add(deployment) + require.NoError(err) + + loaded, err := store.Get(deployment.TwinID, deployment.ContractID) + require.NoError(err) + require.Equal(deployment.Description, loaded.Description) + require.Equal(deployment.Metadata, loaded.Metadata) + require.Equal(len(deployment.Workloads), len(deployment.Workloads)) +} + +func TestStorageByTwin(t *testing.T) { + require := require.New(t) + root := t.TempDir() + + store, err := NewFSStore(root) + require.NoError(err) + + deployment1 := gridtypes.Deployment{ + TwinID: 1, + ContractID: 1, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "volume", + Type: TestType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + } + + err = store.Add(deployment1) + require.NoError(err) + + deployment2 := gridtypes.Deployment{ + TwinID: 1, + ContractID: 2, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "volume", + Type: TestType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + } + + err = store.Add(deployment2) + require.NoError(err) + + deployment3 := gridtypes.Deployment{ + TwinID: 2, + ContractID: 1, + Metadata: "meta", + Description: "descriptions", + Workloads: []gridtypes.Workload{ + { + Name: "volume", + Type: TestType, + Data: gridtypes.MustMarshal(TestData{}), + }, + }, + } + + err = store.Add(deployment3) + require.NoError(err) + + ids, err := store.ByTwin(1) + require.NoError(err) + require.Len(ids, 2) + + ids, err = store.ByTwin(2) + require.NoError(err) + require.Len(ids, 1) +} diff --git a/pkg/provision/storage/storage.go b/pkg/provision/storage/storage.go new file mode 100644 index 00000000..aa3742dc --- /dev/null +++ b/pkg/provision/storage/storage.go @@ -0,0 +1,780 @@ +package storage + +import ( + "encoding/binary" + "encoding/json" + "fmt" + + "github.com/boltdb/bolt" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +var ( + ErrTransactionNotExist = fmt.Errorf("no transaction found") + ErrInvalidWorkloadType = fmt.Errorf("invalid workload type") +) + +const ( + keyVersion = "version" + keyMetadata = "metadata" + keyDescription = "description" + keySignatureRequirement = "signature_requirement" + keyWorkloads = "workloads" + keyTransactions = "transactions" + keyGlobal = "global" +) + +type MigrationStorage struct { + unsafe BoltStorage +} + +type BoltStorage struct { + db *bolt.DB + unsafe bool +} + +var _ provision.Storage = (*BoltStorage)(nil) + +func New(path string) (*BoltStorage, error) { + db, err := bolt.Open(path, 0644, bolt.DefaultOptions) + if err != nil { + return nil, err + } + + return &BoltStorage{ + db, false, + }, nil +} + +func (b BoltStorage) Migration() MigrationStorage { + b.unsafe = true + return MigrationStorage{unsafe: b} +} + +func (b *BoltStorage) u32(u uint32) []byte { + var v [4]byte + binary.BigEndian.PutUint32(v[:], u) + return v[:] +} + +func (b *BoltStorage) l32(v []byte) uint32 { + return binary.BigEndian.Uint32(v) +} + +func (b *BoltStorage) u64(u uint64) []byte { + var v [8]byte + binary.BigEndian.PutUint64(v[:], u) + return v[:] +} + +func (b *BoltStorage) l64(v []byte) uint64 { + return binary.BigEndian.Uint64(v) +} + +func (b *BoltStorage) Create(deployment gridtypes.Deployment) error { + return b.db.Update(func(tx *bolt.Tx) error { + twin, err := tx.CreateBucketIfNotExists(b.u32(deployment.TwinID)) + if err != nil { + return errors.Wrap(err, "failed to create twin") + } + dl, err := twin.CreateBucket(b.u64(deployment.ContractID)) + if errors.Is(err, bolt.ErrBucketExists) { + return provision.ErrDeploymentExists + } else if err != nil { + return errors.Wrap(err, "failed to create deployment") + } + + if err := dl.Put([]byte(keyVersion), b.u32(deployment.Version)); err != nil { + return err + } + if err := dl.Put([]byte(keyDescription), []byte(deployment.Description)); err != nil { + return err + } + if err := dl.Put([]byte(keyMetadata), []byte(deployment.Metadata)); err != nil { + return err + } + sig, err := json.Marshal(deployment.SignatureRequirement) + if err != nil { + return errors.Wrap(err, "failed to encode signature requirement") + } + if err := dl.Put([]byte(keySignatureRequirement), sig); err != nil { + return err + } + + for _, wl := range deployment.Workloads { + if err := b.add(tx, deployment.TwinID, deployment.ContractID, wl); err != nil { + return err + } + } + return nil + }) +} + +func (b *BoltStorage) Update(twin uint32, deployment uint64, field ...provision.Field) error { + return b.db.Update(func(t *bolt.Tx) error { + twin := t.Bucket(b.u32(twin)) + if twin == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "twin not found") + } + deployment := twin.Bucket(b.u64(deployment)) + if deployment == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "deployment not found") + } + + for _, field := range field { + var key, value []byte + switch f := field.(type) { + case provision.VersionField: + key = []byte(keyVersion) + value = b.u32(f.Version) + case provision.MetadataField: + key = []byte(keyMetadata) + value = []byte(f.Metadata) + case provision.DescriptionField: + key = []byte(keyDescription) + value = []byte(f.Description) + case provision.SignatureRequirementField: + key = []byte(keySignatureRequirement) + var err error + value, err = json.Marshal(f.SignatureRequirement) + if err != nil { + return errors.Wrap(err, "failed to serialize signature requirements") + } + default: + return fmt.Errorf("unknown field") + } + + if err := deployment.Put(key, value); err != nil { + return errors.Wrapf(err, "failed to update deployment") + } + } + + return nil + }) +} + +// Migrate deployment creates an exact copy of dl in this storage. +// usually used to copy deployment from older storage +func (b *MigrationStorage) Migrate(dl gridtypes.Deployment) error { + err := b.unsafe.Create(dl) + if errors.Is(err, provision.ErrDeploymentExists) { + log.Debug().Uint32("twin", dl.TwinID).Uint64("deployment", dl.ContractID).Msg("deployment already migrated") + return nil + } else if err != nil { + return err + } + + for _, wl := range dl.Workloads { + if err := b.unsafe.Transaction(dl.TwinID, dl.ContractID, wl); err != nil { + return err + } + if wl.Result.State == gridtypes.StateDeleted { + if err := b.unsafe.Remove(dl.TwinID, dl.ContractID, wl.Name); err != nil { + return err + } + } + } + + return nil +} + +func (b *BoltStorage) Delete(twin uint32, deployment uint64) error { + return b.db.Update(func(t *bolt.Tx) error { + bucket := t.Bucket(b.u32(twin)) + if bucket == nil { + return nil + } + + if err := bucket.DeleteBucket(b.u64(deployment)); err != nil && !errors.Is(err, bolt.ErrBucketNotFound) { + return err + } + // if the twin now is empty then we can also delete the twin + curser := bucket.Cursor() + found := false + for k, v := curser.First(); k != nil; k, v = curser.Next() { + if v != nil { + // checking that it is a bucket + continue + } + + if len(k) != 8 || string(k) == "global" { + // sanity check it's a valid uint32 + continue + } + + found = true + break + } + + if !found { + // empty bucket + return t.DeleteBucket(b.u32(twin)) + } + + return nil + }) +} + +func (b *BoltStorage) Get(twin uint32, deployment uint64) (dl gridtypes.Deployment, err error) { + dl.TwinID = twin + dl.ContractID = deployment + err = b.db.View(func(t *bolt.Tx) error { + twin := t.Bucket(b.u32(twin)) + if twin == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "twin not found") + } + deployment := twin.Bucket(b.u64(deployment)) + if deployment == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "deployment not found") + } + if value := deployment.Get([]byte(keyVersion)); value != nil { + dl.Version = b.l32(value) + } + if value := deployment.Get([]byte(keyDescription)); value != nil { + dl.Description = string(value) + } + if value := deployment.Get([]byte(keyMetadata)); value != nil { + dl.Metadata = string(value) + } + if value := deployment.Get([]byte(keySignatureRequirement)); value != nil { + if err := json.Unmarshal(value, &dl.SignatureRequirement); err != nil { + return err + } + } + return nil + }) + if err != nil { + return dl, err + } + + dl.Workloads, err = b.workloads(twin, deployment) + return +} + +func (b *BoltStorage) Error(twinID uint32, dl uint64, e error) error { + current, err := b.Get(twinID, dl) + if err != nil { + return err + } + return b.db.Update(func(t *bolt.Tx) error { + twin := t.Bucket(b.u32(twinID)) + if twin == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "twin not found") + } + deployment := twin.Bucket(b.u64(dl)) + if deployment == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "deployment not found") + } + result := gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateError, + Error: e.Error(), + } + for _, wl := range current.Workloads { + if err := b.transaction(t, twinID, dl, wl.WithResults(result)); err != nil { + return err + } + } + return nil + }) +} + +func (b *BoltStorage) add(tx *bolt.Tx, twinID uint32, dl uint64, workload gridtypes.Workload) error { + global := gridtypes.IsSharable(workload.Type) + twin := tx.Bucket(b.u32(twinID)) + if twin == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "twin not found") + } + + if global { + shared, err := twin.CreateBucketIfNotExists([]byte(keyGlobal)) + if err != nil { + return errors.Wrap(err, "failed to create twin global bucket") + } + + if !b.unsafe { + if value := shared.Get([]byte(workload.Name)); value != nil { + return errors.Wrapf( + provision.ErrDeploymentConflict, "global workload with the same name '%s' exists", workload.Name) + } + } + + if err := shared.Put([]byte(workload.Name), b.u64(dl)); err != nil { + return err + } + } + + deployment := twin.Bucket(b.u64(dl)) + if deployment == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "deployment not found") + } + + workloads, err := deployment.CreateBucketIfNotExists([]byte(keyWorkloads)) + if err != nil { + return errors.Wrap(err, "failed to prepare workloads storage") + } + + if value := workloads.Get([]byte(workload.Name)); value != nil { + return errors.Wrap(provision.ErrWorkloadExists, "workload with same name already exists in deployment") + } + + if err := workloads.Put([]byte(workload.Name), []byte(workload.Type.String())); err != nil { + return err + } + + return b.transaction(tx, twinID, dl, + workload.WithResults(gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateInit, + }), + ) +} + +func (b *BoltStorage) Add(twin uint32, deployment uint64, workload gridtypes.Workload) error { + return b.db.Update(func(tx *bolt.Tx) error { + return b.add(tx, twin, deployment, workload) + }) +} + +func (b *BoltStorage) Remove(twin uint32, deployment uint64, name gridtypes.Name) error { + return b.db.Update(func(tx *bolt.Tx) error { + twin := tx.Bucket(b.u32(twin)) + if twin == nil { + return nil + } + + deployment := twin.Bucket(b.u64(deployment)) + if deployment == nil { + return nil + } + + workloads := deployment.Bucket([]byte(keyWorkloads)) + if workloads == nil { + return nil + } + + typ := workloads.Get([]byte(name)) + if typ == nil { + return nil + } + + if gridtypes.IsSharable(gridtypes.WorkloadType(typ)) { + if shared := twin.Bucket([]byte(keyGlobal)); shared != nil { + if err := shared.Delete([]byte(name)); err != nil { + return err + } + } + } + + return workloads.Delete([]byte(name)) + }) +} + +func (b *BoltStorage) transaction(tx *bolt.Tx, twinID uint32, dl uint64, workload gridtypes.Workload) error { + if err := workload.Result.Valid(); err != nil { + return errors.Wrap(err, "failed to validate workload result") + } + + data, err := json.Marshal(workload) + if err != nil { + return errors.Wrap(err, "failed to encode workload data") + } + + twin := tx.Bucket(b.u32(twinID)) + if twin == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "twin not found") + } + deployment := twin.Bucket(b.u64(dl)) + if deployment == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "deployment not found") + } + + workloads := deployment.Bucket([]byte(keyWorkloads)) + if workloads == nil { + return errors.Wrap(provision.ErrWorkloadNotExist, "deployment has no active workloads") + } + + typRaw := workloads.Get([]byte(workload.Name)) + if typRaw == nil { + return errors.Wrap(provision.ErrWorkloadNotExist, "workload does not exist") + } + + if workload.Type != gridtypes.WorkloadType(typRaw) { + return errors.Wrapf(ErrInvalidWorkloadType, "invalid workload type, expecting '%s'", string(typRaw)) + } + + logs, err := deployment.CreateBucketIfNotExists([]byte(keyTransactions)) + if err != nil { + return errors.Wrap(err, "failed to prepare deployment transaction logs") + } + + id, err := logs.NextSequence() + if err != nil { + return err + } + + return logs.Put(b.u64(id), data) +} + +func (b *BoltStorage) changes(tx *bolt.Tx, twinID uint32, dl uint64) ([]gridtypes.Workload, error) { + twin := tx.Bucket(b.u32(twinID)) + if twin == nil { + return nil, errors.Wrap(provision.ErrDeploymentNotExists, "twin not found") + } + deployment := twin.Bucket(b.u64(dl)) + if deployment == nil { + return nil, errors.Wrap(provision.ErrDeploymentNotExists, "deployment not found") + } + + logs := deployment.Bucket([]byte(keyTransactions)) + if logs == nil { + return nil, nil + } + var changes []gridtypes.Workload + err := logs.ForEach(func(k, v []byte) error { + if len(v) == 0 { + return nil + } + + var wl gridtypes.Workload + if err := json.Unmarshal(v, &wl); err != nil { + return errors.Wrap(err, "failed to load transaction log") + } + + changes = append(changes, wl) + return nil + }) + + return changes, err +} + +func (b *BoltStorage) Transaction(twin uint32, deployment uint64, workload gridtypes.Workload) error { + return b.db.Update(func(tx *bolt.Tx) error { + return b.transaction(tx, twin, deployment, workload) + }) +} + +func (b *BoltStorage) Changes(twin uint32, deployment uint64) (changes []gridtypes.Workload, err error) { + err = b.db.View(func(tx *bolt.Tx) error { + changes, err = b.changes(tx, twin, deployment) + return err + }) + + return +} + +func (b *BoltStorage) workloads(twin uint32, deployment uint64) ([]gridtypes.Workload, error) { + names := make(map[gridtypes.Name]gridtypes.WorkloadType) + workloads := make(map[gridtypes.Name]gridtypes.Workload) + + err := b.db.View(func(tx *bolt.Tx) error { + twin := tx.Bucket(b.u32(twin)) + if twin == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "twin not found") + } + deployment := twin.Bucket(b.u64(deployment)) + if deployment == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "deployment not found") + } + + types := deployment.Bucket([]byte(keyWorkloads)) + if types == nil { + // no active workloads + return nil + } + + err := types.ForEach(func(k, v []byte) error { + names[gridtypes.Name(k)] = gridtypes.WorkloadType(v) + return nil + }) + if err != nil { + return err + } + + if len(names) == 0 { + return nil + } + + logs := deployment.Bucket([]byte(keyTransactions)) + if logs == nil { + // should we return an error instead? + return nil + } + + cursor := logs.Cursor() + + for k, v := cursor.Last(); k != nil; k, v = cursor.Prev() { + var workload gridtypes.Workload + if err := json.Unmarshal(v, &workload); err != nil { + return errors.Wrap(err, "error while scanning transcation logs") + } + + if _, ok := workloads[workload.Name]; ok { + // already loaded and have last state + continue + } + + typ, ok := names[workload.Name] + if !ok { + // not an active workload + continue + } + + if workload.Type != typ { + return fmt.Errorf("database inconsistency wrong workload type") + } + + // otherwise we have a match. + if workload.Result.State == gridtypes.StateUnChanged { + continue + } + + workloads[workload.Name] = workload + if len(workloads) == len(names) { + // we all latest states of active workloads + break + } + } + + return nil + }) + if err != nil { + return nil, err + } + + if len(workloads) != len(names) { + return nil, fmt.Errorf("inconsistency in deployment, missing workload transactions") + } + + result := make([]gridtypes.Workload, 0, len(workloads)) + + for _, wl := range workloads { + result = append(result, wl) + } + + return result, err +} + +func (b *BoltStorage) Current(twin uint32, deployment uint64, name gridtypes.Name) (gridtypes.Workload, error) { + var workload gridtypes.Workload + err := b.db.View(func(tx *bolt.Tx) error { + twin := tx.Bucket(b.u32(twin)) + if twin == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "twin not found") + } + deployment := twin.Bucket(b.u64(deployment)) + if deployment == nil { + return errors.Wrap(provision.ErrDeploymentNotExists, "deployment not found") + } + + workloads := deployment.Bucket([]byte(keyWorkloads)) + if workloads == nil { + return errors.Wrap(provision.ErrWorkloadNotExist, "deployment has no active workloads") + } + + // this checks if this workload is an "active" workload. + // if workload is not in this map, then workload might have been + // deleted. + typRaw := workloads.Get([]byte(name)) + if typRaw == nil { + return errors.Wrap(provision.ErrWorkloadNotExist, "workload does not exist") + } + + typ := gridtypes.WorkloadType(typRaw) + + logs := deployment.Bucket([]byte(keyTransactions)) + if logs == nil { + return errors.Wrap(ErrTransactionNotExist, "no transaction logs available") + } + + cursor := logs.Cursor() + + found := false + for k, v := cursor.Last(); k != nil; k, v = cursor.Prev() { + if err := json.Unmarshal(v, &workload); err != nil { + return errors.Wrap(err, "error while scanning transcation logs") + } + + if workload.Name != name { + continue + } + + if workload.Type != typ { + return fmt.Errorf("database inconsistency wrong workload type") + } + + // otherwise we have a match. + if workload.Result.State == gridtypes.StateUnChanged { + continue + } + found = true + break + } + + if !found { + return ErrTransactionNotExist + } + + return nil + }) + + return workload, err +} + +func (b *BoltStorage) Twins() ([]uint32, error) { + var twins []uint32 + err := b.db.View(func(t *bolt.Tx) error { + curser := t.Cursor() + for k, v := curser.First(); k != nil; k, v = curser.Next() { + if v != nil { + // checking that it is a bucket + continue + } + + if len(k) != 4 { + // sanity check it's a valid uint32 + continue + } + + twins = append(twins, b.l32(k)) + } + + return nil + }) + + return twins, err +} + +func (b *BoltStorage) ByTwin(twin uint32) ([]uint64, error) { + var deployments []uint64 + err := b.db.View(func(t *bolt.Tx) error { + bucket := t.Bucket(b.u32(twin)) + if bucket == nil { + return nil + } + + curser := bucket.Cursor() + for k, v := curser.First(); k != nil; k, v = curser.Next() { + if v != nil { + // checking that it is a bucket + continue + } + + if len(k) != 8 || string(k) == "global" { + // sanity check it's a valid uint32 + continue + } + + deployments = append(deployments, b.l64(k)) + } + + return nil + }) + + return deployments, err +} + +func (b *BoltStorage) Capacity(exclude ...provision.Exclude) (storageCap provision.StorageCapacity, err error) { + twins, err := b.Twins() + if err != nil { + return provision.StorageCapacity{}, err + } + + for _, twin := range twins { + dls, err := b.ByTwin(twin) + if err != nil { + log.Error().Err(err).Uint32("twin", twin).Msg("failed to get twin deployments") + continue + } + for _, dl := range dls { + deployment, err := b.Get(twin, dl) + if err != nil { + log.Error().Err(err).Uint32("twin", twin).Uint64("deployment", dl).Msg("failed to get deployment") + continue + } + + isActive := false + next: + for _, wl := range deployment.Workloads { + if !wl.Result.State.IsOkay() { + continue + } + for _, exc := range exclude { + if exc(&deployment, &wl) { + continue next + } + } + c, err := wl.Capacity() + if err != nil { + return provision.StorageCapacity{}, err + } + + isActive = true + storageCap.Workloads += 1 + storageCap.Cap.Add(&c) + if wl.Result.Created > storageCap.LastDeploymentTimestamp { + storageCap.LastDeploymentTimestamp = wl.Result.Created + } + } + if isActive { + storageCap.Deployments = append(storageCap.Deployments, deployment) + } + } + } + + return storageCap, nil +} + +func (b *BoltStorage) Close() error { + return b.db.Close() +} + +// CleanDeleted is a cleaner method intended to clean up old "deleted" contracts +// that has no active workloads anymore. We used to always leave the entire history +// of all deployments that ever lived on the system. But we changed that so once +// a deployment is deleted, it's deleted forever. Hence this code is only needed +// temporary until it's available on all environments then can be dropped. +func (b *BoltStorage) CleanDeleted() error { + twins, err := b.Twins() + if err != nil { + return err + } + + for _, twin := range twins { + dls, err := b.ByTwin(twin) + if err != nil { + log.Error().Err(err).Uint32("twin", twin).Msg("failed to get twin deployments") + continue + } + for _, dl := range dls { + deployment, err := b.Get(twin, dl) + if err != nil { + log.Error().Err(err).Uint32("twin", twin).Uint64("deployment", dl).Msg("failed to get deployment") + continue + } + + isActive := false + for _, wl := range deployment.Workloads { + if !wl.Result.State.IsOkay() { + continue + } + + isActive = true + break + } + + if isActive { + continue + } + + if err := b.Delete(twin, dl); err != nil { + log.Error().Err(err).Uint32("twin", twin).Uint64("deployment", dl).Msg("failed to delete deployment") + } + } + } + + return nil +} diff --git a/pkg/provision/storage/storage_test.go b/pkg/provision/storage/storage_test.go new file mode 100644 index 00000000..b23a8d74 --- /dev/null +++ b/pkg/provision/storage/storage_test.go @@ -0,0 +1,527 @@ +package storage + +import ( + "encoding/json" + "fmt" + "io" + "math/rand" + "os" + "path/filepath" + "sort" + "testing" + + "github.com/boltdb/bolt" + "github.com/stretchr/testify/require" + "github.com/threefoldtech/zos4/pkg/provision" + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +const ( + testType1 = gridtypes.WorkloadType("type1") + testType2 = gridtypes.WorkloadType("type2") + testSharableType1 = gridtypes.WorkloadType("sharable1") +) + +type TestData struct{} + +func (t TestData) Valid(getter gridtypes.WorkloadGetter) error { + return nil +} + +func (t TestData) Challenge(w io.Writer) error { + return nil +} + +func (t TestData) Capacity() (gridtypes.Capacity, error) { + return gridtypes.Capacity{}, nil +} + +func init() { + gridtypes.RegisterType(testType1, TestData{}) + gridtypes.RegisterType(testType2, TestData{}) + gridtypes.RegisterSharableType(testSharableType1, TestData{}) +} + +func TestCreateDeployment(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + } + err = db.Create(dl) + require.NoError(err) + + err = db.Create(dl) + require.ErrorIs(err, provision.ErrDeploymentExists) +} + +func TestCreateDeploymentWithWorkloads(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + Workloads: []gridtypes.Workload{ + { + Type: testType1, + Name: "vm1", + }, + { + Type: testType2, + Name: "vm2", + }, + }, + } + + err = db.Create(dl) + require.NoError(err) + + err = db.Create(dl) + require.ErrorIs(err, provision.ErrDeploymentExists) + + loaded, err := db.Get(1, 10) + require.NoError(err) + require.Len(loaded.Workloads, 2) +} + +func TestCreateDeploymentWithSharableWorkloads(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + Workloads: []gridtypes.Workload{ + { + Type: testType1, + Name: "vm1", + }, + { + Type: testSharableType1, + Name: "network", + }, + }, + } + + err = db.Create(dl) + require.NoError(err) + + dl.ContractID = 11 + err = db.Create(dl) + require.ErrorIs(err, provision.ErrDeploymentConflict) + + require.NoError(db.Remove(1, 10, "networkd")) + err = db.Create(dl) + require.ErrorIs(err, provision.ErrDeploymentConflict) +} + +func TestAddWorkload(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + err = db.Add(1, 10, gridtypes.Workload{Name: "vm1", Type: testType1}) + require.ErrorIs(err, provision.ErrDeploymentNotExists) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + } + + err = db.Create(dl) + require.NoError(err) + + err = db.Add(1, 10, gridtypes.Workload{Name: "vm1", Type: testType1}) + require.NoError(err) + + err = db.Add(1, 10, gridtypes.Workload{Name: "vm1", Type: testType1}) + require.ErrorIs(err, provision.ErrWorkloadExists) +} + +func TestRemoveWorkload(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + } + + err = db.Create(dl) + require.NoError(err) + + err = db.Add(1, 10, gridtypes.Workload{Name: "vm1", Type: testType1}) + require.NoError(err) + + err = db.Remove(1, 10, "vm1") + require.NoError(err) + + err = db.Add(1, 10, gridtypes.Workload{Name: "vm1", Type: testType1}) + require.NoError(err) +} + +func TestTransactions(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + } + + err = db.Create(dl) + require.NoError(err) + + _, err = db.Current(1, 10, "vm1") + require.ErrorIs(err, provision.ErrWorkloadNotExist) + + err = db.Add(1, 10, gridtypes.Workload{Name: "vm1", Type: testType1}) + require.NoError(err) + + wl, err := db.Current(1, 10, "vm1") + require.NoError(err) + require.Equal(gridtypes.StateInit, wl.Result.State) + + err = db.Transaction(1, 10, gridtypes.Workload{ + Type: testType1, + Name: gridtypes.Name("wrong"), // wrong name + Result: gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateOk, + }, + }) + + require.ErrorIs(err, provision.ErrWorkloadNotExist) + + err = db.Transaction(1, 10, gridtypes.Workload{ + Type: testType2, // wrong type + Name: gridtypes.Name("vm1"), + Result: gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateOk, + }, + }) + + require.ErrorIs(err, ErrInvalidWorkloadType) + + err = db.Transaction(1, 10, gridtypes.Workload{ + Type: testType1, + Name: gridtypes.Name("vm1"), + Result: gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateOk, + }, + }) + + require.NoError(err) + + wl, err = db.Current(1, 10, "vm1") + require.NoError(err) + require.Equal(gridtypes.Name("vm1"), wl.Name) + require.Equal(testType1, wl.Type) + require.Equal(gridtypes.StateOk, wl.Result.State) +} + +func TestTwins(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + } + + err = db.Create(dl) + require.NoError(err) + + dl.TwinID = 2 + + err = db.Create(dl) + require.NoError(err) + + twins, err := db.Twins() + require.NoError(err) + + require.Len(twins, 2) + require.EqualValues(1, twins[0]) + require.EqualValues(2, twins[1]) +} + +func TestGet(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + } + + err = db.Create(dl) + require.NoError(err) + + require.NoError(db.Add(dl.TwinID, dl.ContractID, gridtypes.Workload{Name: "vm1", Type: testType1})) + require.NoError(db.Add(dl.TwinID, dl.ContractID, gridtypes.Workload{Name: "vm2", Type: testType2})) + + loaded, err := db.Get(1, 10) + require.NoError(err) + + require.EqualValues(1, loaded.Version) + require.EqualValues(1, loaded.TwinID) + require.EqualValues(10, loaded.ContractID) + require.EqualValues("description", loaded.Description) + require.EqualValues("some metadata", loaded.Metadata) + require.Len(loaded.Workloads, 2) +} + +func TestError(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + someError := fmt.Errorf("something is wrong") + err = db.Error(1, 10, someError) + require.ErrorIs(err, provision.ErrDeploymentNotExists) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + Workloads: []gridtypes.Workload{ + {Name: "vm1", Type: testType1}, + }, + } + + err = db.Create(dl) + require.NoError(err) + + err = db.Error(1, 10, someError) + require.NoError(err) + + loaded, err := db.Get(1, 10) + require.NoError(err) + require.Equal(gridtypes.StateError, loaded.Workloads[0].Result.State) + require.Equal(someError.Error(), loaded.Workloads[0].Result.Error) +} + +func TestMigrate(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + Workloads: []gridtypes.Workload{ + { + Name: "vm1", + Type: testType1, + Data: json.RawMessage("null"), + Result: gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateOk, + Data: json.RawMessage("\"hello\""), + }, + }, + { + Name: "vm2", + Type: testType2, + Data: json.RawMessage("\"input\""), + Result: gridtypes.Result{ + Created: gridtypes.Now(), + State: gridtypes.StateError, + Data: json.RawMessage("null"), + Error: "some error", + }, + }, + }, + } + + migration := db.Migration() + err = migration.Migrate(dl) + require.NoError(err) + + loaded, err := db.Get(1, 10) + sort.Slice(loaded.Workloads, func(i, j int) bool { + return loaded.Workloads[i].Name < loaded.Workloads[j].Name + }) + + require.NoError(err) + require.EqualValues(dl, loaded) +} + +func TestMigrateUnsafe(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + migration := db.Migration() + + require.False(db.unsafe) + require.True(migration.unsafe.unsafe) +} + +func TestDeleteDeployment(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + Workloads: []gridtypes.Workload{ + { + Type: testType1, + Name: "vm1", + }, + { + Type: testType2, + Name: "vm2", + }, + }, + } + + err = db.Create(dl) + require.NoError(err) + + err = db.Delete(1, 10) + require.NoError(err) + + _, err = db.Get(1, 10) + require.ErrorIs(err, provision.ErrDeploymentNotExists) + deployments, err := db.ByTwin(1) + require.NoError(err) + require.Empty(deployments) + + err = db.db.View(func(tx *bolt.Tx) error { + bucket := tx.Bucket(db.u32(1)) + if bucket == nil { + return nil + } + return fmt.Errorf("twin bucket was not deleted") + }) + require.NoError(err) +} + +func TestDeleteDeploymentMultiple(t *testing.T) { + require := require.New(t) + path := filepath.Join(os.TempDir(), fmt.Sprint(rand.Int63())) + defer os.RemoveAll(path) + + db, err := New(path) + require.NoError(err) + + dl := gridtypes.Deployment{ + Version: 1, + TwinID: 1, + ContractID: 10, + Description: "description", + Metadata: "some metadata", + Workloads: []gridtypes.Workload{ + { + Type: testType1, + Name: "vm1", + }, + { + Type: testType2, + Name: "vm2", + }, + }, + } + + err = db.Create(dl) + require.NoError(err) + + dl.ContractID = 20 + err = db.Create(dl) + require.NoError(err) + + err = db.Delete(1, 10) + require.NoError(err) + + _, err = db.Get(1, 10) + require.ErrorIs(err, provision.ErrDeploymentNotExists) + deployments, err := db.ByTwin(1) + require.NoError(err) + require.Len(deployments, 1) + + _, err = db.Get(1, 20) + require.NoError(err) +} diff --git a/pkg/registrar.go b/pkg/registrar.go new file mode 100644 index 00000000..e84e3da8 --- /dev/null +++ b/pkg/registrar.go @@ -0,0 +1,10 @@ +package pkg + +//go:generate mkdir -p stubs + +//go:generate zbusc -module registrar -version 0.0.1 -name registrar -package stubs github.com/threefoldtech/zos4/pkg+Registrar stubs/registrar_stub.go + +type Registrar interface { + NodeID() (uint32, error) + TwinID() (uint32, error) +} diff --git a/pkg/registrar_api_gateway.go b/pkg/registrar_api_gateway.go index e3e1cd1f..0d6a8883 100644 --- a/pkg/registrar_api_gateway.go +++ b/pkg/registrar_api_gateway.go @@ -18,15 +18,15 @@ type RegistrarGateway interface { EnsureAccount(twinID uint64, pk []byte) (twin db.Account, err error) GetContract(id uint64) (substrate.Contract, pkg.SubstrateError) GetContractIDByNameRegistration(name string) (uint64, pkg.SubstrateError) + GetNodeRentContract(node uint32) (uint64, pkg.SubstrateError) GetFarm(id uint64) (db.Farm, error) GetNode(id uint64) (db.Node, error) GetNodeByTwinID(twin uint64) (uint64, error) GetNodeContracts(node uint32) ([]types.U64, error) - GetNodeRentContract(node uint32) (uint64, pkg.SubstrateError) GetNodes(farmID uint32) ([]uint32, error) GetPowerTarget() (power substrate.NodePower, err error) GetTwin(id uint64) (db.Account, error) - GetTwinByPubKey(pk []byte) (uint64, pkg.SubstrateError) + GetTwinByPubKey(pk []byte) (uint64, error) Report(consumptions []substrate.NruConsumption) (types.Hash, error) SetContractConsumption(resources ...substrate.ContractResources) error SetNodePowerState(up bool) (hash types.Hash, err error) diff --git a/pkg/registrar_gateway/registrar_gateway.go b/pkg/registrar_gateway/registrar_gateway.go index a8eac8d5..2dfa26ae 100644 --- a/pkg/registrar_gateway/registrar_gateway.go +++ b/pkg/registrar_gateway/registrar_gateway.go @@ -174,18 +174,17 @@ func (g *registrarGateway) GetPowerTarget() (power substrate.NodePower, err erro } func (r *registrarGateway) GetTwin(id uint64) (result db.Account, err error) { - url := fmt.Sprintf("%s/v1/accounts/%d", r.baseURL, id) + url := fmt.Sprintf("%s/v1/accounts/", r.baseURL) log.Trace().Str("url", "url").Uint64("id", id).Msg("get account") - return r.getTwin(url) + return r.getTwin(url, id) } -func (g *registrarGateway) GetTwinByPubKey(pk []byte) (result uint64, serr pkg.SubstrateError) { +func (r *registrarGateway) GetTwinByPubKey(pk []byte) (result uint64, err error) { + url := fmt.Sprintf("%s/v1/accounts/", r.baseURL) log.Trace().Str("method", "GetTwinByPubKey").Str("pk", hex.EncodeToString(pk)).Msg("method called") - twinID, err := g.sub.GetTwinByPubKey(pk) - serr = buildSubstrateError(err) - return uint64(twinID), serr + return r.getTwinByPubKey(url, pk) } func (r *registrarGateway) Report(consumptions []substrate.NruConsumption) (types.Hash, error) { @@ -364,23 +363,72 @@ func (g *registrarGateway) ensureAccount(twinID uint64, relay string, pk []byte) return twin, err } -func (g *registrarGateway) getTwin(url string) (result db.Account, err error) { - resp, err := g.httpClient.Get(url) +func (r *registrarGateway) getTwin(url string, twinID uint64) (result db.Account, err error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return + } + + q := req.URL.Query() + q.Add("twin_id", fmt.Sprint(twinID)) + req.URL.RawQuery = q.Encode() + + resp, err := r.httpClient.Do(req) if err != nil { return } + if resp == nil { + return result, errors.New("no response received") + } + if resp.StatusCode == http.StatusNotFound { return result, ErrorRecordNotFound } if resp.StatusCode != http.StatusOK { - return result, fmt.Errorf("failed to get twin with status code %s", resp.Status) + return result, fmt.Errorf("failed to get account by twin id with status code %s", resp.Status) } + defer resp.Body.Close() err = json.NewDecoder(resp.Body).Decode(&result) - return + return result, err +} + +func (r *registrarGateway) getTwinByPubKey(url string, pk []byte) (result uint64, err error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return + } + + q := req.URL.Query() + + publicKeyBase64 := base64.StdEncoding.EncodeToString(pk) + q.Add("public_key", publicKeyBase64) + req.URL.RawQuery = q.Encode() + + resp, err := r.httpClient.Do(req) + if err != nil { + return + } + + if resp == nil { + return result, errors.New("no response received") + } + + if resp.StatusCode == http.StatusNotFound { + return result, ErrorRecordNotFound + } + + if resp.StatusCode != http.StatusOK { + return result, fmt.Errorf("failed to get account by public_key with status code %s", resp.Status) + } + + defer resp.Body.Close() + + err = json.NewDecoder(resp.Body).Decode(&result) + return result, err } func (r *registrarGateway) getZosVersion(url string) (string, error) { diff --git a/pkg/registrar_light/register.go b/pkg/registrar_light/register.go index 50fa746d..ac71465e 100644 --- a/pkg/registrar_light/register.go +++ b/pkg/registrar_light/register.go @@ -15,18 +15,12 @@ import ( "github.com/threefoldtech/zbus" registrargw "github.com/threefoldtech/zos4/pkg/registrar_gateway" zos4Stubs "github.com/threefoldtech/zos4/pkg/stubs" - "github.com/threefoldtech/zosbase/pkg" "github.com/threefoldtech/zosbase/pkg/environment" "github.com/threefoldtech/zosbase/pkg/geoip" gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" "github.com/threefoldtech/zosbase/pkg/stubs" ) -const ( - tcUrl = "http://zos.tf/terms/v0.1" - tcHash = "9021d4dee05a661e2cb6838152c67f25" // not this is hash of the url not the document -) - type RegistrationInfo struct { Capacity gridtypes.Capacity Location geoip.Location @@ -108,9 +102,8 @@ func registerNode( info RegistrationInfo, ) (nodeID, twinID uint64, err error) { var ( - mgr = zos4Stubs.NewIdentityManagerStub(cl) - netMgr = stubs.NewNetworkerLightStub(cl) - // substrateGateway = stubs.NewSubstrateGatewayStub(cl) + mgr = zos4Stubs.NewIdentityManagerStub(cl) + netMgr = stubs.NewNetworkerLightStub(cl) registrarGateway = zos4Stubs.NewRegistrarGatewayStub(cl) ) @@ -229,11 +222,12 @@ func registerNode( func ensureTwin(ctx context.Context, registrarGateway *zos4Stubs.RegistrarGatewayStub, sk ed25519.PrivateKey) (uint64, error) { pubKey := sk.Public().(ed25519.PublicKey) - twinID, subErr := registrarGateway.GetTwinByPubKey(ctx, pubKey) - if subErr.IsCode(pkg.CodeNotFound) { - return registrarGateway.CreateTwin(ctx, "", nil) - } else if subErr.IsError() { - return 0, errors.Wrap(subErr.Err, "failed to list twins") + twinID, err := registrarGateway.GetTwinByPubKey(ctx, pubKey) + if err != nil { + if errors.Is(err, registrargw.ErrorRecordNotFound) { + return registrarGateway.CreateTwin(ctx, "", nil) + } + return 0, errors.Wrap(err, "failed to list twins") } return twinID, nil diff --git a/pkg/registrar_light/registrar.go b/pkg/registrar_light/registrar.go index 2f5c2159..4adb938b 100644 --- a/pkg/registrar_light/registrar.go +++ b/pkg/registrar_light/registrar.go @@ -2,6 +2,7 @@ package registrar import ( "context" + "crypto/ed25519" "os" "sync" "time" @@ -10,6 +11,7 @@ import ( "github.com/pkg/errors" "github.com/rs/zerolog/log" "github.com/threefoldtech/zbus" + zos4stubs "github.com/threefoldtech/zos4/pkg/stubs" "github.com/threefoldtech/zosbase/pkg/app" "github.com/threefoldtech/zosbase/pkg/environment" "github.com/threefoldtech/zosbase/pkg/stubs" @@ -144,7 +146,7 @@ func (r *Registrar) register(ctx context.Context, cl zbus.Client, env environmen select { case <-ctx.Done(): case <-time.After(monitorAccountEvery): - if err := r.reActivate(ctx, cl, env); err != nil { + if err := r.reActivate(ctx, cl); err != nil { log.Error().Err(err).Msg("failed to reactivate account") } case <-time.After(updateNodeInfoInterval): @@ -157,10 +159,18 @@ func (r *Registrar) register(ctx context.Context, cl zbus.Client, env environmen } } -func (r *Registrar) reActivate(ctx context.Context, cl zbus.Client, env environment.Environment) error { - substrateGateway := stubs.NewSubstrateGatewayStub(cl) +func (r *Registrar) reActivate(ctx context.Context, cl zbus.Client) error { + registrarGateway := zos4stubs.NewRegistrarGatewayStub(cl) + identityManager := zos4stubs.NewIdentityManagerStub(cl) - _, err := substrateGateway.EnsureAccount(ctx, env.ActivationURL, tcUrl, tcHash) + sk := ed25519.PrivateKey(identityManager.PrivateKey(ctx)) + pubKey := sk.Public().(ed25519.PrivateKey) + + twinID, err := r.TwinID() + if err != nil { + return err + } + _, err = registrarGateway.EnsureAccount(ctx, uint64(twinID), pubKey) return err } diff --git a/pkg/stubs/identity_stub.go b/pkg/stubs/identity_stub.go index 5b72c2f9..64a8fe2a 100644 --- a/pkg/stubs/identity_stub.go +++ b/pkg/stubs/identity_stub.go @@ -7,6 +7,7 @@ package stubs import ( "context" zbus "github.com/threefoldtech/zbus" + pkg1 "github.com/threefoldtech/zos4/pkg" pkg "github.com/threefoldtech/zosbase/pkg" ) @@ -27,23 +28,6 @@ func NewIdentityManagerStub(client zbus.Client) *IdentityManagerStub { } } -func (s *IdentityManagerStub) Address(ctx context.Context) (ret0 pkg.Address, ret1 error) { - args := []interface{}{} - result, err := s.client.RequestContext(ctx, s.module, s.object, "Address", args...) - if err != nil { - panic(err) - } - result.PanicOnError() - ret1 = result.CallError() - loader := zbus.Loader{ - &ret0, - } - if err := result.Unmarshal(&loader); err != nil { - panic(err) - } - return -} - func (s *IdentityManagerStub) Decrypt(ctx context.Context, arg0 []uint8) (ret0 []uint8, ret1 error) { args := []interface{}{arg0} result, err := s.client.RequestContext(ctx, s.module, s.object, "Decrypt", args...) @@ -129,14 +113,13 @@ func (s *IdentityManagerStub) Farm(ctx context.Context) (ret0 string, ret1 error return } -func (s *IdentityManagerStub) FarmID(ctx context.Context) (ret0 pkg.FarmID, ret1 error) { +func (s *IdentityManagerStub) FarmID(ctx context.Context) (ret0 pkg.FarmID) { args := []interface{}{} result, err := s.client.RequestContext(ctx, s.module, s.object, "FarmID", args...) if err != nil { panic(err) } result.PanicOnError() - ret1 = result.CallError() loader := zbus.Loader{ &ret0, } @@ -146,14 +129,13 @@ func (s *IdentityManagerStub) FarmID(ctx context.Context) (ret0 pkg.FarmID, ret1 return } -func (s *IdentityManagerStub) FarmSecret(ctx context.Context) (ret0 string, ret1 error) { +func (s *IdentityManagerStub) FarmSecret(ctx context.Context) (ret0 string) { args := []interface{}{} result, err := s.client.RequestContext(ctx, s.module, s.object, "FarmSecret", args...) if err != nil { panic(err) } result.PanicOnError() - ret1 = result.CallError() loader := zbus.Loader{ &ret0, } @@ -163,7 +145,7 @@ func (s *IdentityManagerStub) FarmSecret(ctx context.Context) (ret0 string, ret1 return } -func (s *IdentityManagerStub) NodeID(ctx context.Context) (ret0 pkg.StrIdentifier) { +func (s *IdentityManagerStub) NodeID(ctx context.Context) (ret0 pkg1.StrIdentifier) { args := []interface{}{} result, err := s.client.RequestContext(ctx, s.module, s.object, "NodeID", args...) if err != nil { diff --git a/pkg/stubs/provision_stub.go b/pkg/stubs/provision_stub.go new file mode 100644 index 00000000..859094b9 --- /dev/null +++ b/pkg/stubs/provision_stub.go @@ -0,0 +1,161 @@ +// GENERATED CODE +// -------------- +// please do not edit manually instead use the "zbusc" to regenerate + +package stubs + +import ( + "context" + zbus "github.com/threefoldtech/zbus" + gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +type ProvisionStub struct { + client zbus.Client + module string + object zbus.ObjectID +} + +func NewProvisionStub(client zbus.Client) *ProvisionStub { + return &ProvisionStub{ + client: client, + module: "provision", + object: zbus.ObjectID{ + Name: "provision", + Version: "0.0.1", + }, + } +} + +func (s *ProvisionStub) Changes(ctx context.Context, arg0 uint32, arg1 uint64) (ret0 []gridtypes.Workload, ret1 error) { + args := []interface{}{arg0, arg1} + result, err := s.client.RequestContext(ctx, s.module, s.object, "Changes", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *ProvisionStub) CreateOrUpdate(ctx context.Context, arg0 uint32, arg1 gridtypes.Deployment, arg2 bool) (ret0 error) { + args := []interface{}{arg0, arg1, arg2} + result, err := s.client.RequestContext(ctx, s.module, s.object, "CreateOrUpdate", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret0 = result.CallError() + loader := zbus.Loader{} + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *ProvisionStub) DecommissionCached(ctx context.Context, arg0 string, arg1 string) (ret0 error) { + args := []interface{}{arg0, arg1} + result, err := s.client.RequestContext(ctx, s.module, s.object, "DecommissionCached", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret0 = result.CallError() + loader := zbus.Loader{} + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *ProvisionStub) Get(ctx context.Context, arg0 uint32, arg1 uint64) (ret0 gridtypes.Deployment, ret1 error) { + args := []interface{}{arg0, arg1} + result, err := s.client.RequestContext(ctx, s.module, s.object, "Get", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *ProvisionStub) GetWorkloadStatus(ctx context.Context, arg0 string) (ret0 gridtypes.ResultState, ret1 bool, ret2 error) { + args := []interface{}{arg0} + result, err := s.client.RequestContext(ctx, s.module, s.object, "GetWorkloadStatus", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret2 = result.CallError() + loader := zbus.Loader{ + &ret0, + &ret1, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *ProvisionStub) List(ctx context.Context, arg0 uint32) (ret0 []gridtypes.Deployment, ret1 error) { + args := []interface{}{arg0} + result, err := s.client.RequestContext(ctx, s.module, s.object, "List", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *ProvisionStub) ListPrivateIPs(ctx context.Context, arg0 uint32, arg1 gridtypes.Name) (ret0 []string, ret1 error) { + args := []interface{}{arg0, arg1} + result, err := s.client.RequestContext(ctx, s.module, s.object, "ListPrivateIPs", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *ProvisionStub) ListPublicIPs(ctx context.Context) (ret0 []string, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "ListPublicIPs", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} diff --git a/pkg/stubs/registrar-gateway.go b/pkg/stubs/registrar-gateway.go index cb34c94a..8365f6f3 100644 --- a/pkg/stubs/registrar-gateway.go +++ b/pkg/stubs/registrar-gateway.go @@ -270,16 +270,16 @@ func (s *RegistrarGatewayStub) GetTwin(ctx context.Context, arg0 uint64) (ret0 d return } -func (s *RegistrarGatewayStub) GetTwinByPubKey(ctx context.Context, arg0 []uint8) (ret0 uint64, ret1 pkg.SubstrateError) { +func (s *RegistrarGatewayStub) GetTwinByPubKey(ctx context.Context, arg0 []uint8) (ret0 uint64, ret1 error) { args := []interface{}{arg0} result, err := s.client.RequestContext(ctx, s.module, s.object, "GetTwinByPubKey", args...) if err != nil { panic(err) } result.PanicOnError() + ret1 = result.CallError() loader := zbus.Loader{ &ret0, - &ret1, } if err := result.Unmarshal(&loader); err != nil { panic(err) diff --git a/pkg/stubs/registrar_stub.go b/pkg/stubs/registrar_stub.go new file mode 100644 index 00000000..08d5f59f --- /dev/null +++ b/pkg/stubs/registrar_stub.go @@ -0,0 +1,61 @@ +// GENERATED CODE +// -------------- +// please do not edit manually instead use the "zbusc" to regenerate + +package stubs + +import ( + "context" + zbus "github.com/threefoldtech/zbus" +) + +type RegistrarStub struct { + client zbus.Client + module string + object zbus.ObjectID +} + +func NewRegistrarStub(client zbus.Client) *RegistrarStub { + return &RegistrarStub{ + client: client, + module: "registrar", + object: zbus.ObjectID{ + Name: "registrar", + Version: "0.0.1", + }, + } +} + +func (s *RegistrarStub) NodeID(ctx context.Context) (ret0 uint32, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "NodeID", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *RegistrarStub) TwinID(ctx context.Context) (ret0 uint32, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "TwinID", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} diff --git a/pkg/stubs/statistics_stub.go b/pkg/stubs/statistics_stub.go new file mode 100644 index 00000000..24a30f0d --- /dev/null +++ b/pkg/stubs/statistics_stub.go @@ -0,0 +1,137 @@ +// GENERATED CODE +// -------------- +// please do not edit manually instead use the "zbusc" to regenerate + +package stubs + +import ( + "context" + zbus "github.com/threefoldtech/zbus" + pkg "github.com/threefoldtech/zos4/pkg" + gridtypes "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +type StatisticsStub struct { + client zbus.Client + module string + object zbus.ObjectID +} + +func NewStatisticsStub(client zbus.Client) *StatisticsStub { + return &StatisticsStub{ + client: client, + module: "provision", + object: zbus.ObjectID{ + Name: "statistics", + Version: "0.0.1", + }, + } +} + +func (s *StatisticsStub) Current(ctx context.Context) (ret0 gridtypes.Capacity, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "Current", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *StatisticsStub) GetCounters(ctx context.Context) (ret0 pkg.Counters, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "GetCounters", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *StatisticsStub) ListGPUs(ctx context.Context) (ret0 []pkg.GPUInfo, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "ListGPUs", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *StatisticsStub) ReservedStream(ctx context.Context) (<-chan gridtypes.Capacity, error) { + ch := make(chan gridtypes.Capacity, 1) + recv, err := s.client.Stream(ctx, s.module, s.object, "ReservedStream") + if err != nil { + return nil, err + } + go func() { + defer close(ch) + for event := range recv { + var obj gridtypes.Capacity + if err := event.Unmarshal(&obj); err != nil { + panic(err) + } + select { + case <-ctx.Done(): + return + case ch <- obj: + default: + } + } + }() + return ch, nil +} + +func (s *StatisticsStub) Total(ctx context.Context) (ret0 gridtypes.Capacity) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "Total", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + +func (s *StatisticsStub) Workloads(ctx context.Context) (ret0 int, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "Workloads", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +}