From b804a85969a54163f7a0eab67b0ebf99379c58b1 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 12 Dec 2023 15:03:51 +0000 Subject: [PATCH] add roles detail from config when engines are off-line Features: control Required-githooks: true Signed-off-by: Tom Nabarro --- .../cmd/dmg/pretty/storage_nvme_test.go | 29 ++++ src/control/cmd/dmg/storage.go | 4 +- src/control/server/ctl_storage_rpc.go | 132 ++++++++++++------ src/control/server/ctl_storage_rpc_test.go | 119 ++++++++++++++-- src/control/server/instance_storage_rpc.go | 38 ++--- .../server/instance_storage_rpc_test.go | 24 +++- src/control/server/storage/config.go | 28 ++++ 7 files changed, 293 insertions(+), 81 deletions(-) diff --git a/src/control/cmd/dmg/pretty/storage_nvme_test.go b/src/control/cmd/dmg/pretty/storage_nvme_test.go index aac7cbd17ee7..148206c697ec 100644 --- a/src/control/cmd/dmg/pretty/storage_nvme_test.go +++ b/src/control/cmd/dmg/pretty/storage_nvme_test.go @@ -30,6 +30,11 @@ func TestPretty_PrintNVMeController(t *testing.T) { c.SmdDevices = []*storage.SmdDevice{sd} return c } + ctrlrWithNilRank := func(idx int32) *storage.NvmeController { + c := ctrlrWithSmd(idx, 0) + c.SmdDevices[0].Rank = ranklist.NilRank + return c + } for name, tc := range map[string]struct { devices storage.NvmeControllers expPrintStr string @@ -68,6 +73,30 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank -------- ----- ----------- ------ -------- ------- ---- 0000:01:00.0 model-1 fwRev-1 1 2.0 TB data 1 0000:02:00.0 model-2 fwRev-2 0 2.0 TB meta,wal 2 +`, + }, + "controllers with no roles": { + devices: storage.NvmeControllers{ + ctrlrWithSmd(1, 0), + ctrlrWithSmd(2, 0), + }, + expPrintStr: ` +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB N/A 1 +0000:02:00.0 model-2 fwRev-2 0 2.0 TB N/A 2 +`, + }, + "controllers with no rank": { + devices: storage.NvmeControllers{ + ctrlrWithNilRank(1), + ctrlrWithNilRank(2), + }, + expPrintStr: ` +NVMe PCI Model FW Revision Socket Capacity Role(s) Rank +-------- ----- ----------- ------ -------- ------- ---- +0000:01:00.0 model-1 fwRev-1 1 2.0 TB N/A None +0000:02:00.0 model-2 fwRev-2 0 2.0 TB N/A None `, }, } { diff --git a/src/control/cmd/dmg/storage.go b/src/control/cmd/dmg/storage.go index 7b69161a49a9..711407808f5f 100644 --- a/src/control/cmd/dmg/storage.go +++ b/src/control/cmd/dmg/storage.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2019-2022 Intel Corporation. +// (C) Copyright 2019-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -49,7 +49,7 @@ func (cmd *storageScanCmd) Execute(_ []string) error { req := &control.StorageScanReq{ NvmeHealth: cmd.NvmeHealth, - // Don't strip nvme details if verbose or health flags set. + // Strip nvme details if verbose and health flags are unset. NvmeBasic: !(cmd.Verbose || cmd.NvmeHealth), } req.SetHostList(cmd.getHostList()) diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index 474c7c5b0c55..52a1f97cdb29 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -19,10 +19,10 @@ import ( "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/common/proto" "github.com/daos-stack/daos/src/control/common/proto/convert" - "github.com/daos-stack/daos/src/control/common/proto/ctl" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/hardware" + "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/server/engine" "github.com/daos-stack/daos/src/control/server/storage" @@ -69,8 +69,39 @@ var ( type scanBdevsFn func(storage.BdevScanRequest) (*storage.BdevScanResponse, error) +func ctrlrToPciStr(nc *ctlpb.NvmeController) (string, error) { + pciAddr, err := hardware.NewPCIAddress(nc.GetPciAddr()) + if err != nil { + return "", errors.Errorf("Invalid PCI address: %s", err) + } + if pciAddr.IsVMDBackingAddress() { + if pciAddr, err = pciAddr.BackingToVMDAddress(); err != nil { + return "", errors.Errorf("Invalid VMD address: %s", err) + } + } + + return pciAddr.String(), nil +} + +func findBdevTier(pciAddr string, tcs storage.TierConfigs) *storage.TierConfig { + for _, tc := range tcs { + if !tc.IsBdev() { + continue + } + for _, name := range tc.Bdev.DeviceList.Devices() { + if pciAddr == name { + return tc + } + } + } + + return nil +} + // Convert bdev scan results to protobuf response. -func bdevScanToProtoResp(scan scanBdevsFn, req storage.BdevScanRequest) (*ctlpb.ScanNvmeResp, error) { +func bdevScanToProtoResp(scan scanBdevsFn, bdevCfgs storage.TierConfigs) (*ctlpb.ScanNvmeResp, error) { + req := storage.BdevScanRequest{DeviceList: bdevCfgs.Bdevs()} + resp, err := scan(req) if err != nil { return nil, err @@ -82,18 +113,35 @@ func bdevScanToProtoResp(scan scanBdevsFn, req storage.BdevScanRequest) (*ctlpb. return nil, err } + if bdevCfgs.HaveBdevs() { + // Update proto Ctrlrs with role info for offline display. + for _, c := range pbCtrlrs { + pciAddrStr, err := ctrlrToPciStr(c) + if err != nil { + return nil, err + } + bc := findBdevTier(pciAddrStr, bdevCfgs) + if bc == nil { + return nil, errors.Errorf("unknown PCI device, scanned ctrlr %q "+ + "not found in cfg", pciAddrStr) + } + if len(c.SmdDevices) != 0 { + return nil, errors.Errorf("scanned ctrlr %q has unexpected smd", + pciAddrStr) + } + c.SmdDevices = append(c.SmdDevices, &ctlpb.SmdDevice{ + RoleBits: uint32(bc.Bdev.DeviceRoles.OptionBits), + Rank: uint32(ranklist.NilRank), + }) + } + } + return &ctlpb.ScanNvmeResp{ State: new(ctlpb.ResponseState), Ctrlrs: pbCtrlrs, }, nil } -// Scan bdevs through harness's ControlService (not per-engine). -func bdevScanGlobal(cs *ControlService, cfgBdevs *storage.BdevDeviceList) (*ctlpb.ScanNvmeResp, error) { - req := storage.BdevScanRequest{DeviceList: cfgBdevs} - return bdevScanToProtoResp(cs.storage.ScanBdevs, req) -} - // Scan bdevs through each engine and collate response results. func bdevScanEngines(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, nsps []*ctlpb.ScmNamespace) (*ctlpb.ScanNvmeResp, error) { var errLast error @@ -111,6 +159,8 @@ func bdevScanEngines(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvm eReq.MetaSize, eReq.RdbSize = ms, rs } + // If partial number of engines return results, indicate errors for non-ready + // engines whilst returning successful scanmresults. respEng, err := scanEngineBdevs(ctx, ei, eReq) if err != nil { err = errors.Wrapf(err, "instance %d", ei.Index()) @@ -140,6 +190,7 @@ func bdevScanTrimResults(req *ctlpb.ScanNvmeReq, resp *ctlpb.ScanNvmeResp) *ctlp pbc.HealthStats = nil } if req.GetBasic() { + pbc.SmdDevices = nil pbc.Serial = "" pbc.Model = "" pbc.FwRev = "" @@ -159,11 +210,15 @@ func engineHasStarted(instances []Engine) bool { return false } -func bdevScanAssigned(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, nsps []*ctlpb.ScmNamespace, hasStarted *bool, cfgBdevs *storage.BdevDeviceList) (*ctlpb.ScanNvmeResp, error) { +func bdevScanAssigned(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, nsps []*ctlpb.ScmNamespace, hasStarted *bool, bdevCfgs storage.TierConfigs) (*ctlpb.ScanNvmeResp, error) { *hasStarted = engineHasStarted(cs.harness.Instances()) if !*hasStarted { cs.log.Debugf("scan bdevs from control service as no engines started") - return bdevScanGlobal(cs, cfgBdevs) + if req.Meta { + return nil, errors.New("meta smd usage info unavailable as engines stopped") + } + + return bdevScanToProtoResp(cs.storage.ScanBdevs, bdevCfgs) } // Delegate scan to engine instances as soon as one engine with assigned bdevs has started. @@ -184,16 +239,21 @@ func bdevScan(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, n } }() - cfgBdevs := getBdevCfgsFromSrvCfg(cs.srvCfg).Bdevs() + bdevCfgs := getBdevCfgsFromSrvCfg(cs.srvCfg) + nrCfgBdevs := bdevCfgs.Bdevs().Len() - if cfgBdevs.Len() == 0 { + if nrCfgBdevs == 0 { cs.log.Debugf("scan bdevs from control service as no bdevs in cfg") + if req.Meta { + return nil, errors.New("meta smd usage info unavailable as no bdevs in cfg") + } // No bdevs configured for engines to claim so scan through control service. - resp, err = bdevScanGlobal(cs, cfgBdevs) + resp, err = bdevScanToProtoResp(cs.storage.ScanBdevs, bdevCfgs) if err != nil { return nil, err } + return bdevScanTrimResults(req, resp), nil } @@ -202,26 +262,26 @@ func bdevScan(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, n // been claimed by SPDK but details are not yet available over dRPC. var hasStarted bool - resp, err = bdevScanAssigned(ctx, cs, req, nsps, &hasStarted, cfgBdevs) + resp, err = bdevScanAssigned(ctx, cs, req, nsps, &hasStarted, bdevCfgs) if err != nil { return nil, err } // Retry once if global scan returns unexpected number of controllers in case engines // claimed devices between when started state was checked and scan was executed. - if !hasStarted && len(resp.Ctrlrs) != cfgBdevs.Len() { + if !hasStarted && len(resp.Ctrlrs) != nrCfgBdevs { cs.log.Debugf("retrying bdev scan as unexpected nr returned, want %d got %d", - cfgBdevs.Len(), len(resp.Ctrlrs)) + nrCfgBdevs, len(resp.Ctrlrs)) - resp, err = bdevScanAssigned(ctx, cs, req, nsps, &hasStarted, cfgBdevs) + resp, err = bdevScanAssigned(ctx, cs, req, nsps, &hasStarted, bdevCfgs) if err != nil { return nil, err } } - if len(resp.Ctrlrs) != cfgBdevs.Len() { - cs.log.Noticef("bdev scan returned unexpected nr, want %d got %d", - cfgBdevs.Len(), len(resp.Ctrlrs)) + if len(resp.Ctrlrs) != nrCfgBdevs { + cs.log.Noticef("bdev scan returned unexpected nr, want %d got %d", nrCfgBdevs, + len(resp.Ctrlrs)) } return bdevScanTrimResults(req, resp), nil @@ -281,36 +341,24 @@ func (cs *ControlService) scanScm(ctx context.Context, req *ctlpb.ScanScmReq) (* } // Returns the engine configuration managing the given NVMe controller -func (cs *ControlService) getEngineCfgFromNvmeCtl(nc *ctl.NvmeController) (*engine.Config, error) { - pciAddr, err := hardware.NewPCIAddress(nc.GetPciAddr()) +func (cs *ControlService) getEngineCfgFromNvmeCtl(nc *ctlpb.NvmeController) (*engine.Config, error) { + pciAddrStr, err := ctrlrToPciStr(nc) if err != nil { - return nil, errors.Errorf("Invalid PCI address: %s", err) - } - if pciAddr.IsVMDBackingAddress() { - if pciAddr, err = pciAddr.BackingToVMDAddress(); err != nil { - return nil, errors.Errorf("Invalid VMD address: %s", err) - } + return nil, err } - ctlrAddr := pciAddr.String() for index := range cs.srvCfg.Engines { - for _, tierCfg := range cs.srvCfg.Engines[index].Storage.Tiers { - if !tierCfg.IsBdev() { - continue - } - for _, devName := range tierCfg.Bdev.DeviceList.Devices() { - if devName == ctlrAddr { - return cs.srvCfg.Engines[index], nil - } - } + if findBdevTier(pciAddrStr, cs.srvCfg.Engines[index].Storage.Tiers) != nil { + return cs.srvCfg.Engines[index], nil } } - return nil, errors.Errorf("unknown PCI device %q", pciAddr) + return nil, errors.Errorf("unknown PCI device, scanned ctrlr %q not found in cfg", + pciAddrStr) } // Returns the engine configuration managing the given SCM name-space -func (cs *ControlService) getEngineCfgFromScmNsp(nsp *ctl.ScmNamespace) (*engine.Config, error) { +func (cs *ControlService) getEngineCfgFromScmNsp(nsp *ctlpb.ScmNamespace) (*engine.Config, error) { mountPoint := nsp.GetMount().Path for index := range cs.srvCfg.Engines { for _, tierCfg := range cs.srvCfg.Engines[index].Storage.Tiers { @@ -383,7 +431,7 @@ func metaRdbComputeSz(cs *ControlService, ei Engine, nsps []*ctlpb.ScmNamespace) } type deviceToAdjust struct { - ctlr *ctl.NvmeController + ctlr *ctlpb.NvmeController idx int rank uint32 } @@ -585,7 +633,7 @@ func (cs *ControlService) adjustScmSize(resp *ctlpb.ScanScmResp) { } mnt.UsableBytes -= mdBytes - removeControlPlaneMetadata := func(m *ctl.ScmNamespace_Mount) { + removeControlPlaneMetadata := func(m *ctlpb.ScmNamespace_Mount) { mountPath := m.GetPath() cs.log.Tracef("Removing control plane metadata (%s, %d bytes) from the usable size of the SCM device %q", diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 71e7e891c557..fb6c700b15e8 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -28,6 +28,7 @@ import ( "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/events" "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/provider/system" "github.com/daos-stack/daos/src/control/server/config" @@ -72,15 +73,15 @@ func TestServer_bdevScan(t *testing.T) { "nil request": { expErr: errors.New("nil request"), }, - "no bdevs in config; scan local fails": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan local; no bdevs in config; scan fails": { + req: &ctlpb.ScanNvmeReq{Health: true}, engTierCfgs: []storage.TierConfigs{{}}, provErr: errors.New("fail"), engStopped: []bool{false}, expErr: errors.New("fail"), }, - "no bdevs in config; scan local; devlist passed to backend": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan local; no bdevs in config; devlist passed to backend": { + req: &ctlpb.ScanNvmeReq{Health: true}, engTierCfgs: []storage.TierConfigs{{}}, engStopped: []bool{false}, expResp: &ctlpb.ScanNvmeResp{ @@ -93,7 +94,13 @@ func TestServer_bdevScan(t *testing.T) { {DeviceList: new(storage.BdevDeviceList)}, }, }, - "bdevs in config; engine not started; scan local; devlist passed to backend": { + "scan local; no bdevs in config; meta requested": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{{}}, + engStopped: []bool{true}, + expErr: errors.New("info unavailable"), + }, + "scan local; bdevs in config; meta requested": { req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, engTierCfgs: []storage.TierConfigs{ { @@ -103,6 +110,19 @@ func TestServer_bdevScan(t *testing.T) { test.MockPCIAddr(2)), }, }, + engStopped: []bool{true}, + expErr: errors.New("info unavailable"), + }, + "scan local; bdevs in config; devlist passed to backend; no roles": { + req: &ctlpb.ScanNvmeReq{Health: true}, + engTierCfgs: []storage.TierConfigs{ + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), + }, + }, provRes: &storage.BdevScanResponse{ Controllers: storage.NvmeControllers{ storage.MockNvmeController(1), @@ -112,8 +132,26 @@ func TestServer_bdevScan(t *testing.T) { engStopped: []bool{true}, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - proto.MockNvmeController(1), - proto.MockNvmeController(2), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(1) + c.SmdDevices = []*ctlpb.SmdDevice{ + { + Rank: uint32(ranklist.NilRank), + RoleBits: 0, // No roles. + }, + } + return c + }(), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(2) + c.SmdDevices = []*ctlpb.SmdDevice{ + { + Rank: uint32(ranklist.NilRank), + RoleBits: 0, // No roles. + }, + } + return c + }(), }, State: new(ctlpb.ResponseState), }, @@ -124,8 +162,51 @@ func TestServer_bdevScan(t *testing.T) { }, }, }, - "bdevs in config; engine not started; scan local; retry on empty response": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan local; bdevs in config; devlist passed to backend; roles from cfg": { + req: &ctlpb.ScanNvmeReq{Health: true}, + engTierCfgs: []storage.TierConfigs{ + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), + }, + }, + provRes: &storage.BdevScanResponse{ + Controllers: storage.NvmeControllers{ + storage.MockNvmeController(1), + storage.MockNvmeController(2), + }, + }, + engStopped: []bool{true}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(1) + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return c + }(), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(2) + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return c + }(), + }, + State: new(ctlpb.ResponseState), + }, + expBackendScanCalls: []storage.BdevScanRequest{ + { + DeviceList: storage.MustNewBdevDeviceList( + test.MockPCIAddr(1), test.MockPCIAddr(2)), + }, + }, + }, + "scan local; bdevs in config; devlist passed to backend; retry on empty response": { + req: &ctlpb.ScanNvmeReq{Health: true}, engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). @@ -153,7 +234,7 @@ func TestServer_bdevScan(t *testing.T) { }, }, }, - "bdevs in config; engine started; scan remote": { + "scan remote; bdevs in config": { req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, engTierCfgs: []storage.TierConfigs{ { @@ -364,8 +445,8 @@ func TestServer_bdevScan(t *testing.T) { }, }, }, - "bdevs in config; engine not started; scan local; vmd enabled": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan local; bdevs in config; vmd enabled": { + req: &ctlpb.ScanNvmeReq{}, engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). @@ -381,7 +462,15 @@ func TestServer_bdevScan(t *testing.T) { engStopped: []bool{true}, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - &ctlpb.NvmeController{PciAddr: "050505:01:00.0"}, + func() *ctlpb.NvmeController { + nc := &ctlpb.NvmeController{ + PciAddr: "050505:01:00.0", + } + nc.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return nc + }(), }, State: new(ctlpb.ResponseState), }, @@ -389,8 +478,8 @@ func TestServer_bdevScan(t *testing.T) { {DeviceList: storage.MustNewBdevDeviceList("0000:05:05.5")}, }, }, - "bdevs in config; engine started; scan remote; vmd enabled": { - req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + "scan remote; bdevs in config; vmd enabled": { + req: &ctlpb.ScanNvmeReq{Meta: true}, engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). diff --git a/src/control/server/instance_storage_rpc.go b/src/control/server/instance_storage_rpc.go index 75b1057f4d91..750ae8f0d8c6 100644 --- a/src/control/server/instance_storage_rpc.go +++ b/src/control/server/instance_storage_rpc.go @@ -272,29 +272,27 @@ func scanEngineBdevsOverDrpc(ctx context.Context, engine Engine, pbReq *ctlpb.Sc return &pbResp, nil } -func bdevScanEngineAssigned(ctx context.Context, engine Engine, pbReq *ctlpb.ScanNvmeReq, devList *storage.BdevDeviceList, isStarted *bool) (*ctlpb.ScanNvmeResp, error) { +func bdevScanEngineAssigned(ctx context.Context, engine Engine, req *ctlpb.ScanNvmeReq, bdevCfgs storage.TierConfigs, isStarted *bool) (*ctlpb.ScanNvmeResp, error) { *isStarted = engine.IsStarted() if !*isStarted { - engine.Debugf("scanning engine-%d bdev tiers while engine is down", engine.Index()) - - // Retrieve engine cfg bdevs to restrict scan scope. - req := storage.BdevScanRequest{DeviceList: devList} + engine.Debugf("scanning engine-%d bdevs while engine is down", engine.Index()) + if req.Meta { + return nil, errors.New("meta smd usage info unavailable as engine stopped") + } - return bdevScanToProtoResp(engine.GetStorage().ScanBdevs, req) + return bdevScanToProtoResp(engine.GetStorage().ScanBdevs, bdevCfgs) } - engine.Debugf("scanning engine-%d bdev tiers while engine is up", engine.Index()) + engine.Debugf("scanning engine-%d bdevs while engine is up", engine.Index()) - // If engine is started but not ready, wait for ready state. If partial number of engines - // return results, indicate errors for non-ready engines whilst returning successful scan - // results. + // If engine is started but not ready, wait for ready state. pollFn := func(e Engine) bool { return e.IsReady() } if err := pollInstanceState(ctx, []Engine{engine}, pollFn); err != nil { return nil, errors.Wrapf(err, "waiting for engine %d to be ready to receive drpcs", engine.Index()) } - return scanEngineBdevsOverDrpc(ctx, engine, pbReq) + return scanEngineBdevsOverDrpc(ctx, engine, req) } // bdevScanEngine calls either in to the private engine storage provider to scan bdevs if engine process @@ -304,33 +302,35 @@ func bdevScanEngine(ctx context.Context, engine Engine, req *ctlpb.ScanNvmeReq) return nil, errors.New("nil request") } - eCfgBdevs := storage.TierConfigs(engine.GetStorage().GetBdevConfigs()).Bdevs() - if eCfgBdevs.Len() == 0 { + bdevCfgs := storage.TierConfigs(engine.GetStorage().GetBdevConfigs()) + nrCfgBdevs := bdevCfgs.Bdevs().Len() + + if nrCfgBdevs == 0 { return nil, errors.Errorf("empty device list for engine instance %d", engine.Index()) } var isStarted bool - resp, err = bdevScanEngineAssigned(ctx, engine, req, eCfgBdevs, &isStarted) + resp, err = bdevScanEngineAssigned(ctx, engine, req, bdevCfgs, &isStarted) if err != nil { return nil, err } // Retry once if engine provider scan returns unexpected number of controllers in case // engines claimed devices between when started state was checked and scan was executed. - if !isStarted && len(resp.Ctrlrs) != eCfgBdevs.Len() { + if !isStarted && len(resp.Ctrlrs) != nrCfgBdevs { engine.Debugf("retrying engine bdev scan as unexpected nr returned, want %d got %d", - eCfgBdevs.Len(), len(resp.Ctrlrs)) + nrCfgBdevs, len(resp.Ctrlrs)) - resp, err = bdevScanEngineAssigned(ctx, engine, req, eCfgBdevs, &isStarted) + resp, err = bdevScanEngineAssigned(ctx, engine, req, bdevCfgs, &isStarted) if err != nil { return nil, err } } - if len(resp.Ctrlrs) != eCfgBdevs.Len() { + if len(resp.Ctrlrs) != nrCfgBdevs { engine.Debugf("engine bdev scan returned unexpected nr, want %d got %d", - eCfgBdevs.Len(), len(resp.Ctrlrs)) + nrCfgBdevs, len(resp.Ctrlrs)) } return diff --git a/src/control/server/instance_storage_rpc_test.go b/src/control/server/instance_storage_rpc_test.go index 15a0642829b6..0cda576260a3 100644 --- a/src/control/server/instance_storage_rpc_test.go +++ b/src/control/server/instance_storage_rpc_test.go @@ -68,8 +68,20 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { }, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - proto.MockNvmeController(1), - proto.MockNvmeController(2), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(1) + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return c + }(), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(2) + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return c + }(), }, State: new(ctlpb.ResponseState), }, @@ -85,7 +97,13 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { engStopped: true, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - proto.MockNvmeController(1), + func() *ctlpb.NvmeController { + c := proto.MockNvmeController(1) + c.SmdDevices = []*ctlpb.SmdDevice{ + {Rank: uint32(ranklist.NilRank)}, + } + return c + }(), }, State: new(ctlpb.ResponseState), }, diff --git a/src/control/server/storage/config.go b/src/control/server/storage/config.go index 1fe4f6691d6c..c70b62075d8b 100644 --- a/src/control/server/storage/config.go +++ b/src/control/server/storage/config.go @@ -253,10 +253,18 @@ func (tcs TierConfigs) getBdevs(nvmeOnly bool) *BdevDeviceList { } func (tcs TierConfigs) Bdevs() *BdevDeviceList { + if len(tcs) == 0 { + return new(BdevDeviceList) + } + return tcs.getBdevs(false) } func (tcs TierConfigs) NVMeBdevs() *BdevDeviceList { + if len(tcs) == 0 { + return new(BdevDeviceList) + } + return tcs.getBdevs(true) } @@ -282,18 +290,34 @@ func (tcs TierConfigs) checkBdevs(nvmeOnly, emulOnly bool) bool { } func (tcs TierConfigs) HaveBdevs() bool { + if len(tcs) == 0 { + return false + } + return tcs.checkBdevs(false, false) } func (tcs TierConfigs) HaveRealNVMe() bool { + if len(tcs) == 0 { + return false + } + return tcs.checkBdevs(true, false) } func (tcs TierConfigs) HaveEmulatedNVMe() bool { + if len(tcs) == 0 { + return false + } + return tcs.checkBdevs(false, true) } func (tcs TierConfigs) HasBdevRoleMeta() bool { + if len(tcs) == 0 { + return false + } + for _, bc := range tcs.BdevConfigs() { bits := bc.Bdev.DeviceRoles.OptionBits if (bits & BdevRoleMeta) != 0 { @@ -443,6 +467,10 @@ func (tcs TierConfigs) validateBdevRoles() error { // - If the scm tier is of class dcpm, the first (and only) bdev tier should have the Data role. // - If emulated NVMe is present in bdev tiers, implicit role assignment is skipped. func (tcs TierConfigs) AssignBdevTierRoles(extMetadataPath string) error { + if len(tcs) == 0 { + return errors.New("no storage tiers configured") + } + if extMetadataPath == "" { return nil // MD-on-SSD not enabled. }