Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-14550 control: Prevent SPDK calls if disable_hugepages true in cfg #13605

Merged
merged 16 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/control/cmd/dmg/pretty/storage.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// (C) Copyright 2020-2023 Intel Corporation.
// (C) Copyright 2020-2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand All @@ -26,7 +26,8 @@ func printHostStorageMapVerbose(hsm control.HostStorageMap, out io.Writer, opts
hosts := getPrintHosts(hss.HostSet.RangedString(), opts...)
lineBreak := strings.Repeat("-", len(hosts))
fmt.Fprintf(out, "%s\n%s\n%s\n", lineBreak, hosts, lineBreak)
fmt.Fprintf(out, "HugePage Size: %d KB\n", hss.HostStorage.MemInfo.HugepageSizeKiB)
fmt.Fprintf(out, "HugePage Size: %d KB\n\n",
hss.HostStorage.MemInfo.HugepageSizeKiB)
if len(hss.HostStorage.ScmNamespaces) == 0 {
if err := PrintScmModules(hss.HostStorage.ScmModules, out, opts...); err != nil {
return err
Expand Down
22 changes: 12 additions & 10 deletions src/control/cmd/dmg/pretty/storage_nvme.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// (C) Copyright 2020-2023 Intel Corporation.
// (C) Copyright 2020-2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -168,13 +168,14 @@ func parseNvmeFormatResults(inResults storage.NvmeControllers) storage.NvmeContr
parsedResults = append(parsedResults, result)
}
}

return parsedResults
}

func printNvmeFormatResults(ctrlrs storage.NvmeControllers, out io.Writer, opts ...PrintConfigOption) error {
func printNvmeFormatResults(inCtrlrs storage.NvmeControllers, out io.Writer, opts ...PrintConfigOption) error {
ctrlrs := parseNvmeFormatResults(inCtrlrs)
iw := txtfmt.NewIndentWriter(out)
if len(ctrlrs) == 0 {
fmt.Fprintln(out, "\tNo NVMe devices found")
fmt.Fprintln(iw, "No NVMe devices were formatted")
return nil
}

Expand All @@ -188,13 +189,13 @@ func printNvmeFormatResults(ctrlrs storage.NvmeControllers, out io.Writer, opts

sort.Slice(ctrlrs, func(i, j int) bool { return ctrlrs[i].PciAddr < ctrlrs[j].PciAddr })

for _, ctrlr := range parseNvmeFormatResults(ctrlrs) {
row := txtfmt.TableRow{pciTitle: ctrlr.PciAddr}
row[resultTitle] = ctrlr.Info
for _, c := range ctrlrs {
row := txtfmt.TableRow{pciTitle: c.PciAddr}
row[resultTitle] = c.Info
roles := "NA"
// Assumes that all SMD devices on a controller have the same roles.
if len(ctrlr.SmdDevices) > 0 {
roles = fmt.Sprintf("%s", ctrlr.SmdDevices[0].Roles.String())
if len(c.SmdDevices) > 0 {
roles = fmt.Sprintf("%s", c.SmdDevices[0].Roles.String())
}
row[rolesTitle] = roles

Expand All @@ -209,8 +210,9 @@ func printNvmeFormatResults(ctrlrs storage.NvmeControllers, out io.Writer, opts
func PrintNvmeControllers(controllers storage.NvmeControllers, out io.Writer, opts ...PrintConfigOption) error {
w := txtfmt.NewErrWriter(out)

iw := txtfmt.NewIndentWriter(out)
if len(controllers) == 0 {
fmt.Fprintln(out, "\tNo NVMe devices found")
fmt.Fprintln(iw, "No NVMe devices found")
return w.Err
}

Expand Down
13 changes: 7 additions & 6 deletions src/control/cmd/dmg/pretty/storage_scm.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// (C) Copyright 2020-2021 Intel Corporation.
// (C) Copyright 2020-2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand All @@ -18,8 +18,9 @@ import (
)

func printScmMountPoints(mountpoints storage.ScmMountPoints, out io.Writer, opts ...PrintConfigOption) error {
iw := txtfmt.NewIndentWriter(out)
if len(mountpoints) == 0 {
fmt.Fprintln(out, "\tNo SCM mount results")
fmt.Fprintln(iw, "No SCM mount results")
return nil
}

Expand Down Expand Up @@ -48,9 +49,9 @@ func printScmMountPoints(mountpoints storage.ScmMountPoints, out io.Writer, opts
// TODO: un-export function when not needed in cmd/daos_server/storage.go
func PrintScmModules(modules storage.ScmModules, out io.Writer, opts ...PrintConfigOption) error {
w := txtfmt.NewErrWriter(out)

iw := txtfmt.NewIndentWriter(out)
if len(modules) == 0 {
fmt.Fprintln(out, "\tNo SCM modules found")
fmt.Fprintln(iw, "No SCM modules found")
return w.Err
}

Expand Down Expand Up @@ -89,9 +90,9 @@ func PrintScmModules(modules storage.ScmModules, out io.Writer, opts ...PrintCon
// TODO: un-export function when not needed in cmd/daos_server/storage.go
func PrintScmNamespaces(namespaces storage.ScmNamespaces, out io.Writer, opts ...PrintConfigOption) error {
w := txtfmt.NewErrWriter(out)

iw := txtfmt.NewIndentWriter(out)
if len(namespaces) == 0 {
fmt.Fprintln(out, "\tNo SCM namespaces found")
fmt.Fprintln(iw, "No SCM namespaces found")
return w.Err
}

Expand Down
111 changes: 100 additions & 11 deletions src/control/cmd/dmg/pretty/storage_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// (C) Copyright 2020-2023 Intel Corporation.
// (C) Copyright 2020-2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -472,7 +472,8 @@ Errors:
host1
-----
HugePage Size: 2048 KB
No SCM modules found

No SCM modules found

NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
-------- ----- ----------- ------ -------- ------- ----
Expand Down Expand Up @@ -501,11 +502,12 @@ Errors:
host1
-----
HugePage Size: 2048 KB

SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity
---------- ------ ------------ ------- ------------ --------
1 1 1 1 1 954 MiB

No NVMe devices found
No NVMe devices found

`,
},
Expand Down Expand Up @@ -535,9 +537,10 @@ Errors:
host[1-2]
---------
HugePage Size: 2048 KB
No SCM modules found

No NVMe devices found
No SCM modules found

No NVMe devices found

`,
},
Expand All @@ -561,9 +564,10 @@ HugePage Size: 2048 KB
host[1-2]
---------
HugePage Size: 2048 KB
No SCM modules found

No NVMe devices found
No SCM modules found

No NVMe devices found

`,
},
Expand All @@ -583,6 +587,7 @@ HugePage Size: 2048 KB
host1
-----
HugePage Size: 2048 KB

SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity
---------- ------ ------------ ------- ------------ --------
1 1 1 1 1 954 MiB
Expand All @@ -609,6 +614,7 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
host1
-----
HugePage Size: 2048 KB

SCM Namespace Socket Capacity
------------- ------ --------
pmem0 0 1.0 TB
Expand Down Expand Up @@ -639,6 +645,7 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
host[1-2]
---------
HugePage Size: 2048 KB

SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity
---------- ------ ------------ ------- ------------ --------
1 1 1 1 1 954 MiB
Expand Down Expand Up @@ -669,17 +676,19 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
host1
-----
HugePage Size: 2048 KB

SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity
---------- ------ ------------ ------- ------------ --------
1 1 1 1 1 954 MiB

No NVMe devices found
No NVMe devices found

-----
host2
-----
HugePage Size: 2048 KB
No SCM modules found

No SCM modules found

NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
-------- ----- ----------- ------ -------- ------- ----
Expand All @@ -699,6 +708,7 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
host[0-1023]
------------
HugePage Size: 2048 KB

SCM Module Socket Memory Ctrlr Channel Channel Slot Capacity
---------- ------ ------------ ------- ------------ --------
1 1 1 1 1 954 MiB
Expand Down Expand Up @@ -737,7 +747,8 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
host-[0001-0004]
----------------
HugePage Size: 2048 KB
No SCM modules found

No SCM modules found

NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
-------- ----- ----------- ------ -------- ------- ----
Expand Down Expand Up @@ -773,7 +784,8 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
host-j-[0001-0004]
------------------
HugePage Size: 2048 KB
No SCM modules found

No SCM modules found

NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
-------- ----- ----------- ------ -------- ------- ----
Expand Down Expand Up @@ -809,6 +821,7 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
host[1,3]
---------
HugePage Size: 2048 KB

SCM Namespace Socket Capacity
------------- ------ --------
pmem0 0 1.0 TB
Expand All @@ -823,6 +836,7 @@ NVMe PCI Model FW Revision Socket Capacity Role(s) Rank
host[2,4]
---------
HugePage Size: 2048 KB

SCM Namespace Socket Capacity
------------- ------ --------
pmem0 0 1.0 TB
Expand Down Expand Up @@ -1058,6 +1072,41 @@ Format Summary:
Hosts SCM Devices NVMe Devices
----- ----------- ------------
host1 2 2
`,
},
"1 SCM, NVMe skipped": {
resp: &control.StorageFormatResp{
HostErrorsResp: control.HostErrorsResp{
HostErrors: make(control.HostErrorsMap),
},
HostStorage: func() control.HostStorageMap {
hsm := make(control.HostStorageMap)
hs := &control.HostStorage{
ScmMountPoints: []*storage.ScmMountPoint{
{
Info: "success",
Path: "/mnt/0",
},
},
NvmeDevices: []*storage.NvmeController{
{
Info: "skipping",
PciAddr: storage.NilBdevAddress,
},
},
}
if err := hsm.Add("host1", hs); err != nil {
t.Fatal(err)
}
return hsm
}(),
},
expPrintStr: `

Format Summary:
Hosts SCM Devices NVMe Devices
----- ----------- ------------
host1 1 0
`,
},
"2 Hosts, 2 SCM, 2 NVMe; first SCM fails": {
Expand Down Expand Up @@ -1278,6 +1327,46 @@ NVMe PCI Format Result Role(s)
1 CTL_SUCCESS data,meta,wal
2 CTL_SUCCESS data,meta,wal

`,
},
"1 SCM, NVMe skipped": {
resp: &control.StorageFormatResp{
HostErrorsResp: control.HostErrorsResp{
HostErrors: make(control.HostErrorsMap),
},
HostStorage: func() control.HostStorageMap {
hsm := make(control.HostStorageMap)
hs := &control.HostStorage{
ScmMountPoints: []*storage.ScmMountPoint{
{
Info: "CTL_SUCCESS",
Path: "/mnt/0",
},
},
NvmeDevices: []*storage.NvmeController{
{
Info: "skipping",
PciAddr: storage.NilBdevAddress,
},
},
}
if err := hsm.Add("host1", hs); err != nil {
t.Fatal(err)
}
return hsm
}(),
},
expPrintStr: `

-----
host1
-----
SCM Mount Format Result
--------- -------------
/mnt/0 CTL_SUCCESS

No NVMe devices were formatted

`,
},
} {
Expand Down
5 changes: 3 additions & 2 deletions src/control/fault/code/codes.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// (C) Copyright 2018-2023 Intel Corporation.
// (C) Copyright 2018-2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -152,6 +152,7 @@ const (
ServerIncompatibleComponents
ServerNoCompatibilityInsecure
ServerPoolHasContainers
ServerHugepagesDisabled
)

// server config fault codes
Expand Down Expand Up @@ -182,7 +183,7 @@ const (
ServerConfigFaultCallbackEmpty
ServerConfigFaultDomainTooManyLayers
ServerConfigNrHugepagesOutOfRange
ServerConfigHugepagesDisabled
ServerConfigHugepagesDisabledWithBdevs
ServerConfigVMDSettingDuplicate
ServerConfigEngineNUMAImbalance
ServerConfigControlMetadataNoPath
Expand Down
8 changes: 4 additions & 4 deletions src/control/server/config/faults.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// (C) Copyright 2020-2023 Intel Corporation.
// (C) Copyright 2020-2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -91,10 +91,10 @@ var (
"the fault domain path may have a maximum of 2 levels below the root",
"update either the fault domain ('fault_path' parameter) or callback script ('fault_cb' parameter) and restart the control server",
)
FaultConfigHugepagesDisabled = serverConfigFault(
code.ServerConfigHugepagesDisabled,
FaultConfigHugepagesDisabledWithBdevs = serverConfigFault(
code.ServerConfigHugepagesDisabledWithBdevs,
"hugepages cannot be disabled if bdevs have been specified in config",
"remove nr_hugepages parameter from config to have the value automatically calculated",
"either set false (or remove) disable_hugepages parameter or remove nvme storage assignment in config and restart the control server",
)
FaultConfigVMDSettingDuplicate = serverConfigFault(
code.ServerConfigVMDSettingDuplicate,
Expand Down
Loading
Loading