From 095f26c0abdb4ba6537cfac0b7e3363c90f8d892 Mon Sep 17 00:00:00 2001 From: Jay Pipes Date: Sat, 30 Dec 2023 12:27:40 -0500 Subject: [PATCH] begin work of supporting current resource usage This patch begins the process of supporting current resource usage information in `ghw`. The first resource we support is memory on Linux systems. Users can use the `ghw.WithCollectUsage(true)` function to instruct `ghw` to gather usage information for the module in question. Alternately, users can use the `GHW_COLLECT_USAGE=1` environs variable. The following shows the effect of the variable: ``` jaypipes@lappie:~/src/github.com/jaypipes/ghw$ go run cmd/ghwc/main.go memory memory (16GB physical, 16GB usable) jaypipes@lappie:~/src/github.com/jaypipes/ghw$ GHW_COLLECT_USAGE=1 go run cmd/ghwc/main.go memory memory (16GB physical, 16GB usable, 9GB used) ``` Issue #357 Signed-off-by: Jay Pipes --- .github/workflows/fmtcheck.yml | 6 +- .github/workflows/lint.yml | 3 +- .github/workflows/test.yml | 26 +++- README.md | 233 +++++++++++++++++++-------------- go.mod | 2 +- pkg/context/context.go | 5 + pkg/memory/memory.go | 18 ++- pkg/memory/memory_linux.go | 176 +++++++++++++++++-------- pkg/option/option.go | 26 ++++ 9 files changed, 336 insertions(+), 159 deletions(-) diff --git a/.github/workflows/fmtcheck.yml b/.github/workflows/fmtcheck.yml index 623863a9..f048fcdc 100644 --- a/.github/workflows/fmtcheck.yml +++ b/.github/workflows/fmtcheck.yml @@ -20,11 +20,15 @@ jobs: disable-sudo: true allowed-endpoints: > github.com:443 + api.github.com:443 + raw.githubusercontent.com:443 + objects.githubusercontent.com:443 + proxy.golang.org:443 - name: checkout code uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 - name: setup go uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 # v4.0.1 with: - go-version: 1.19 + go-version: 1.21 - name: check fmt run: 'bash -c "diff -u <(echo -n) <(gofmt -d .)"' diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e8488e9a..1162cb71 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,12 +24,13 @@ jobs: api.github.com:443 raw.githubusercontent.com:443 objects.githubusercontent.com:443 + proxy.golang.org:443 - name: checkout code uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 - name: setup go uses: actions/setup-go@fac708d6674e30b6ba41289acaab6d4b75aa0753 # v4.0.1 with: - go-version: 1.19 + go-version: 1.21 - name: lint uses: golangci/golangci-lint-action@639cd343e1d3b897ff35927a75193d57cfcba299 # v3.6.0 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 977703ea..46c152a5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go: [ '1.19', '1.20'] + go: [ '1.20', '1.21'] steps: - name: harden runner uses: step-security/harden-runner@55d479fb1c5bcad5a4f9099a5d9f37c8857b2845 # v2.4.1 @@ -26,6 +26,10 @@ jobs: disable-sudo: true allowed-endpoints: > github.com:443 + api.github.com:443 + raw.githubusercontent.com:443 + objects.githubusercontent.com:443 + proxy.golang.org:443 - name: checkout code uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 - name: setup go @@ -51,6 +55,10 @@ jobs: disable-sudo: true allowed-endpoints: > github.com:443 + api.github.com:443 + raw.githubusercontent.com:443 + objects.githubusercontent.com:443 + proxy.golang.org:443 - name: checkout code uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 - name: setup go @@ -67,7 +75,7 @@ jobs: runs-on: windows-2022 strategy: matrix: - go: [ '1.19' ] + go: [ '1.20', '1.21' ] steps: - name: harden runner uses: step-security/harden-runner@55d479fb1c5bcad5a4f9099a5d9f37c8857b2845 # v2.4.1 @@ -76,6 +84,10 @@ jobs: disable-sudo: true allowed-endpoints: > github.com:443 + api.github.com:443 + raw.githubusercontent.com:443 + objects.githubusercontent.com:443 + proxy.golang.org:443 - name: checkout code uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 - name: setup go @@ -98,7 +110,7 @@ jobs: runs-on: windows-2019 strategy: matrix: - go: [ '1.18' ] + go: [ '1.18', '1.19' ] steps: - name: harden runner uses: step-security/harden-runner@55d479fb1c5bcad5a4f9099a5d9f37c8857b2845 # v2.4.1 @@ -107,6 +119,10 @@ jobs: disable-sudo: true allowed-endpoints: > github.com:443 + api.github.com:443 + raw.githubusercontent.com:443 + objects.githubusercontent.com:443 + proxy.golang.org:443 - name: checkout code uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 - name: setup go @@ -145,6 +161,10 @@ jobs: disable-sudo: true allowed-endpoints: > github.com:443 + api.github.com:443 + raw.githubusercontent.com:443 + objects.githubusercontent.com:443 + proxy.golang.org:443 - name: checkout code uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 - name: setup go diff --git a/README.md b/README.md index c1cdb106..7613d0aa 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# `ghw` - Go HardWare discovery/inspection library +# `ghw` - Go HardWare library [![Go Reference](https://pkg.go.dev/badge/github.com/jaypipes/ghw.svg)](https://pkg.go.dev/github.com/jaypipes/ghw) [![Go Report Card](https://goreportcard.com/badge/github.com/jaypipes/ghw)](https://goreportcard.com/report/github.com/jaypipes/ghw) @@ -10,6 +10,10 @@ `ghw` is a Go library providing hardware inspection and discovery for Linux and Windows. There currently exists partial support for MacOSX. +`ghw` gathers information about your hardware's **capacity** and +**capabilities**. It can also be used to query for your hardware's **current +resource usage**. + ## Design Principles * No root privileges needed for discovery @@ -34,18 +38,6 @@ Windows. There currently exists partial support for MacOSX. the structs returned by various library functions should have consistent attribute and method names. -## Inspecting != Monitoring - -`ghw` is a tool for gathering information about your hardware's **capacity** -and **capabilities**. - -It is important to point out that `ghw` does **NOT** report information that is -temporary or variable. It is **NOT** a system monitor nor is it an appropriate -tool for gathering data points for metrics that change over time. If you are -looking for a system that tracks **usage** of CPU, memory, network I/O or disk -I/O, there are plenty of great open source tools that do this! Check out the -[Prometheus project](https://prometheus.io/) for a great example. - ## Usage `ghw` has functions that return an `Info` object about a particular hardware @@ -67,7 +59,18 @@ hardware: * [`ghw.Baseboard()`](#baseboard) * [`ghw.Product()`](#product) -### CPU +Each top-level function has the same signature. The top-level functions accept +zero or more `ghw.WithOption` structs and return a pointer to a `ghw.XXXInfo` +struct. + +A `ghw.XXXInfo` struct corresponds the name of the module being queried. For +example, the `ghw.CPU` module's `ghw.XXXInfo` struct is `ghw.CPUInfo`. + +`ghw.WithOption` structs are returned from +[`ghw` functions prefix with `With`][#with-functions], for example +`ghw.WithCollectUsage()` or `ghw.WithChroot()`. + +## CPU The `ghw.CPU()` function returns a `ghw.CPUInfo` struct that contains information about the CPUs on the host system. @@ -180,7 +183,7 @@ cpu (1 physical package, 6 cores, 12 hardware threads) flexpriority ept vpid dtherm ida arat] ``` -### Memory +## Memory The `ghw.Memory()` function returns a `ghw.MemoryInfo` struct that contains information about the RAM on the host system. @@ -194,6 +197,11 @@ information about the RAM on the host system. resident memory size and some reserved system bits. Please note this value is **NOT** the amount of memory currently in use by processes in the system. See [the discussion][#physical-versus-usage-memory] about the difference. +* `ghw.MemoryInfo.TotalUsedBytes` contains the amount of memory the system is + currently using. If `GHW_COLLECT_USAGE` is not true/on, this value will be + `-1`. On Linux, this value is calculated by subtracting the sum of free, + buffered, cached and slab reclaimable memory from the total amount of usable + memory. * `ghw.MemoryInfo.SupportedPageSizes` is an array of integers representing the size, in bytes, of memory pages the system supports * `ghw.MemoryInfo.Modules` is an array of pointers to `ghw.MemoryModule` @@ -211,7 +219,7 @@ import ( ) func main() { - memory, err := ghw.Memory() + memory, err := ghw.Memory(ghw.WithCollectUsage()) if err != nil { fmt.Printf("Error getting memory info: %v", err) } @@ -223,10 +231,10 @@ func main() { Example output from my personal workstation: ``` -memory (24GB physical, 24GB usable) +memory (24GB physical, 24GB usable, 9GB used) ``` -#### Physical versus Usable Memory +### Physical versus Usable Memory There has been [some](https://github.com/jaypipes/ghw/pull/171) [confusion](https://github.com/jaypipes/ghw/issues/183) regarding the @@ -281,7 +289,7 @@ system with a Linux GRUB bootloader: The bootloader consumes 3832720 bytes of RAM ``` -### Block storage +## Block storage The `ghw.Block()` function returns a `ghw.BlockInfo` struct that contains information about the block storage on the host system. @@ -394,7 +402,7 @@ block storage (1 disk, 2TB physical storage) > `/run` into your container, otherwise `ghw` won't be able to query the udev > DB or sysfs paths for information. -### Topology +## Topology > **NOTE**: Topology support is currently Linux-only. Windows support is > [planned](https://github.com/jaypipes/ghw/issues/166). @@ -511,7 +519,7 @@ topology SMP (1 nodes) L3 cache (12288 KB) shared with logical processors: 0,1,10,11,2,3,4,5,6,7,8,9 ``` -### Network +## Network The `ghw.Network()` function returns a `ghw.NetworkInfo` struct that contains information about the host computer's networking hardware. @@ -649,7 +657,7 @@ net (3 NICs) - netns-local ``` -### PCI +## PCI `ghw` contains a PCI database inspection and querying facility that allows developers to not only gather information about devices on a local PCI bus but @@ -814,7 +822,7 @@ host PCI devices: 0000:3f:06.3 Intel Corporation Xeon 5600 Series Integrated Memory Co... ``` -#### Finding a PCI device by PCI address +### Finding a PCI device by PCI address In addition to the above information, the `ghw.PCIInfo` struct has the following method: @@ -885,7 +893,7 @@ Subclass: VGA compatible controller [00] Programming Interface: VGA controller [00] ``` -### GPU +## GPU The `ghw.GPU()` function returns a `ghw.GPUInfo` struct that contains information about the host computer's graphics hardware. @@ -945,7 +953,7 @@ information `ghw.TopologyNode` struct if you'd like to dig deeper into the NUMA/topology subsystem -### Chassis +## Chassis The `ghw.Chassis()` function returns a `ghw.ChassisInfo` struct that contains information about the host computer's hardware chassis. @@ -1000,7 +1008,7 @@ WARNING: Unable to read chassis_serial: open /sys/class/dmi/id/chassis_serial: p You can ignore them or use the [Disabling warning messages](#disabling-warning-messages) feature to quiet things down. -### BIOS +## BIOS The `ghw.BIOS()` function returns a `ghw.BIOSInfo` struct that contains information about the host computer's basis input/output system (BIOS). @@ -1036,7 +1044,7 @@ Example output from my personal workstation: bios vendor=System76 version=F2 Z5 date=11/14/2018 ``` -### Baseboard +## Baseboard The `ghw.Baseboard()` function returns a `ghw.BaseboardInfo` struct that contains information about the host computer's hardware baseboard. @@ -1090,7 +1098,7 @@ WARNING: Unable to read board_serial: open /sys/class/dmi/id/board_serial: permi You can ignore them or use the [Disabling warning messages](#disabling-warning-messages) feature to quiet things down. -### Product +## Product The `ghw.Product()` function returns a `ghw.ProductInfo` struct that contains information about the host computer's hardware product line. @@ -1146,7 +1154,82 @@ WARNING: Unable to read product_serial: open /sys/class/dmi/id/product_serial: p You can ignore them or use the [Disabling warning messages](#disabling-warning-messages) feature to quiet things down. -## Advanced Usage +## Serialization to JSON or YAML + +All of the `ghw` `XXXInfo` structs -- e.g. `ghw.CPUInfo` -- have two methods +for producing a serialized JSON or YAML string representation of the contained +information: + +* `JSONString()` returns a string containing the information serialized into + JSON. It accepts a single boolean parameter indicating whether to use + indentation when outputting the string +* `YAMLString()` returns a string containing the information serialized into + YAML + +```go +package main + +import ( + "fmt" + + "github.com/jaypipes/ghw" +) + +func main() { + mem, err := ghw.Memory() + if err != nil { + fmt.Printf("Error getting memory info: %v", err) + } + + fmt.Printf("%s", mem.YAMLString()) +} +``` + +the above example code prints the following out on my local workstation: + +``` +memory: + supported_page_sizes: + - 1073741824 + - 2097152 + total_physical_bytes: 25263415296 + total_usable_bytes: 25263415296 +``` + +## With functions + +`ghw`'s With functions allow you to modify `ghw`'s behaviour when discovering +hardware capabilities or resource usage. + +* `ghw.WithCollectUsage()` tells `ghw` to collect current resource usage when + collecting hardware information. By default, `ghw` does **NOT** collect + current resource usage. +* `ghw.WithDisableWarnings()` tells `ghw` not to print warning information to + `stderr` when it is unable to determine certain information. +* `ghw.WithChroot()` tells `ghw` to use an alternate root mountpoint. +* `ghw.WithPathOverrides()` tells `ghw` to use one or more alternate + mountpoints (Linux only). +* `ghw.WithDisableTools()` tells `ghw` not to use certain external tools (like + `ethtool` on Linux). +* `ghw.WithSnapshot()` tells `ghw` to read a `ghw-snapsot` file instead of + examining the host system. + +### Collecting current resource usage + +By default, `ghw` does **NOT** collect resource usage. To instruct `ghw` to +collect current resource usage, set the `GHW_COLLECT_USAGE` environs +variable. + +You can enable resource usage collection programmatically using the +`ghw.WithCollectUsage()` option: + +```go +import ( + "github.com/jaypipes/ghw" +) + +mem, err := ghw.Memory(ghw.WithCollectUsage()) +``` ### Disabling warning messages @@ -1170,10 +1253,10 @@ $ GHW_DISABLE_WARNINGS=1 ghwc memory memory (24GB physical, 24GB usable) ``` -You can disable warning programmatically using the `WithDisableWarnings` option: +You can disable warning programmatically using the `ghw.WithDisableWarnings()` +option: ```go - import ( "github.com/jaypipes/ghw" ) @@ -1212,48 +1295,6 @@ Alternately, you can use the `ghw.WithChroot()` function like so: cpu, err := ghw.CPU(ghw.WithChroot("/host")) ``` -### Serialization to JSON or YAML - -All of the `ghw` `XXXInfo` structs -- e.g. `ghw.CPUInfo` -- have two methods -for producing a serialized JSON or YAML string representation of the contained -information: - -* `JSONString()` returns a string containing the information serialized into - JSON. It accepts a single boolean parameter indicating whether to use - indentation when outputting the string -* `YAMLString()` returns a string containing the information serialized into - YAML - -```go -package main - -import ( - "fmt" - - "github.com/jaypipes/ghw" -) - -func main() { - mem, err := ghw.Memory() - if err != nil { - fmt.Printf("Error getting memory info: %v", err) - } - - fmt.Printf("%s", mem.YAMLString()) -} -``` - -the above example code prints the following out on my local workstation: - -``` -memory: - supported_page_sizes: - - 1073741824 - - 2097152 - total_physical_bytes: 25263415296 - total_usable_bytes: 25263415296 -``` - ### Overriding a specific mountpoint (Linux only) When running inside containers, it can be cumbersome to only override the root @@ -1275,6 +1316,28 @@ cpu, err := ghw.CPU(ghw.WithPathOverrides(ghw.PathOverrides{ **NOTE**: This feature works in addition and is composable with the `ghw.WithChroot()` function and `GHW_CHROOT` environment variable. +## Calling external programs + +By default `ghw` may call external programs, for example `ethtool`, to learn +about hardware capabilities. In some rare circumstances it may be useful to +opt out from this behaviour and rely only on the data provided by +pseudo-filesystems, like sysfs. + +The most common use case is when we want to read a snapshot from `ghw`. In +these cases the information provided by tools will be inconsistent with the +data from the snapshot - since they will be run on a different host than the +host the snapshot was created for. + +To prevent `ghw` from calling external tools, set the `GHW_DISABLE_TOOLS` +environment variable to any value, or, programmatically, use the +`ghw.WithDisableTools()` function. The default behaviour of ghw is to call +external tools when available. + +> **WARNING**: on all platforms, disabling external tools make ghw return less +> data. Unless noted otherwise, there is _no fallback flow_ if external tools +> are disabled. On MacOSX/Darwin, disabling external tools disables block +> support entirely + ### Reading hardware information from a `ghw` snapshot (Linux only) The `ghw-snapshot` tool can create a snapshot of a host's hardware information. @@ -1327,7 +1390,7 @@ cpu, err := ghw.CPU(ghw.WithSnapshot(ghw.SnapshotOptions{ })) ``` -### Creating snapshots +## Creating snapshots You can create `ghw` snapshots using the `ghw-snapshot` tool or programmatically using the `pkg/snapshot` package. @@ -1377,28 +1440,6 @@ if err := snapshot.PackFrom("my-snapshot.tgz", scratchDir); err != nil { } ``` -## Calling external programs - -By default `ghw` may call external programs, for example `ethtool`, to learn -about hardware capabilities. In some rare circumstances it may be useful to -opt out from this behaviour and rely only on the data provided by -pseudo-filesystems, like sysfs. - -The most common use case is when we want to read a snapshot from `ghw`. In -these cases the information provided by tools will be inconsistent with the -data from the snapshot - since they will be run on a different host than the -host the snapshot was created for. - -To prevent `ghw` from calling external tools, set the `GHW_DISABLE_TOOLS` -environment variable to any value, or, programmatically, use the -`ghw.WithDisableTools()` function. The default behaviour of ghw is to call -external tools when available. - -> **WARNING**: on all platforms, disabling external tools make ghw return less -> data. Unless noted otherwise, there is _no fallback flow_ if external tools -> are disabled. On MacOSX/Darwin, disabling external tools disables block -> support entirely - ## Developers [Contributions](CONTRIBUTING.md) to `ghw` are welcomed! Fork the repo on GitHub diff --git a/go.mod b/go.mod index f315f115..7fa6d86c 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/jaypipes/ghw -go 1.19 +go 1.21 require ( github.com/StackExchange/wmi v1.2.1 diff --git a/pkg/context/context.go b/pkg/context/context.go index fb8de528..02efc151 100644 --- a/pkg/context/context.go +++ b/pkg/context/context.go @@ -17,6 +17,7 @@ import ( // execution context when calling internal discovery methods type Context struct { Chroot string + CollectUsage bool EnableTools bool SnapshotPath string SnapshotRoot string @@ -75,6 +76,10 @@ func New(opts ...*option.Option) *Context { ctx.alert = merged.Alerter } + if merged.CollectUsage != nil { + ctx.CollectUsage = *merged.CollectUsage + } + if merged.EnableTools != nil { ctx.EnableTools = *merged.EnableTools } diff --git a/pkg/memory/memory.go b/pkg/memory/memory.go index 81e0dc66..54fde648 100644 --- a/pkg/memory/memory.go +++ b/pkg/memory/memory.go @@ -36,6 +36,7 @@ type Module struct { type Area struct { TotalPhysicalBytes int64 `json:"total_physical_bytes"` TotalUsableBytes int64 `json:"total_usable_bytes"` + TotalUsedBytes int64 `json:"total_used_bytes"` // An array of sizes, in bytes, of memory pages supported in this area SupportedPageSizes []uint64 `json:"supported_page_sizes"` Modules []*Module `json:"modules"` @@ -44,21 +45,28 @@ type Area struct { // String returns a short string with a summary of information for this memory // area func (a *Area) String() string { - tpbs := util.UNKNOWN + physs := util.UNKNOWN if a.TotalPhysicalBytes > 0 { tpb := a.TotalPhysicalBytes unit, unitStr := unitutil.AmountString(tpb) tpb = int64(math.Ceil(float64(a.TotalPhysicalBytes) / float64(unit))) - tpbs = fmt.Sprintf("%d%s", tpb, unitStr) + physs = fmt.Sprintf("%d%s", tpb, unitStr) } - tubs := util.UNKNOWN + usables := util.UNKNOWN if a.TotalUsableBytes > 0 { tub := a.TotalUsableBytes unit, unitStr := unitutil.AmountString(tub) tub = int64(math.Ceil(float64(a.TotalUsableBytes) / float64(unit))) - tubs = fmt.Sprintf("%d%s", tub, unitStr) + usables = fmt.Sprintf("%d%s", tub, unitStr) } - return fmt.Sprintf("memory (%s physical, %s usable)", tpbs, tubs) + useds := "" + if a.TotalUsedBytes > 0 { + tub := a.TotalUsedBytes + unit, unitStr := unitutil.AmountString(tub) + tub = int64(math.Ceil(float64(a.TotalUsedBytes) / float64(unit))) + useds = fmt.Sprintf(", %d%s used", tub, unitStr) + } + return fmt.Sprintf("memory (%s physical, %s usable%s)", physs, usables, useds) } // Info contains information about the memory on a host system. diff --git a/pkg/memory/memory_linux.go b/pkg/memory/memory_linux.go index d5a54101..12e9ac46 100644 --- a/pkg/memory/memory_linux.go +++ b/pkg/memory/memory_linux.go @@ -45,16 +45,29 @@ var ( func (i *Info) load() error { paths := linuxpath.New(i.ctx) - tub := memTotalUsableBytes(paths) - if tub < 1 { + mi := memInfo{} + if err := mi.load(paths.ProcMeminfo); err != nil { + return err + } + usable := mi.totalUsableBytes() + if usable < 1 { return fmt.Errorf("Could not determine total usable bytes of memory") } - i.TotalUsableBytes = tub + i.TotalUsableBytes = usable + if i.ctx.CollectUsage { + used := mi.totalUsedBytes() + if used < 1 { + return fmt.Errorf("Could not determine total used bytes of memory") + } + i.TotalUsedBytes = used + } else { + i.TotalUsedBytes = -1 + } tpb := memTotalPhysicalBytes(paths) i.TotalPhysicalBytes = tpb if tpb < 1 { i.ctx.Warn(warnCannotDeterminePhysicalMemory) - i.TotalPhysicalBytes = tub + i.TotalPhysicalBytes = usable } i.SupportedPageSizes, _ = memorySupportedPageSizes(paths.SysKernelMMHugepages) return nil @@ -72,10 +85,11 @@ func AreaForNode(ctx *context.Context, nodeID int) (*Area, error) { var totPhys int64 var totUsable int64 - totUsable, err = memoryTotalUsableBytesFromPath(filepath.Join(path, "meminfo")) - if err != nil { + mi := memInfo{} + if err := mi.load(filepath.Join(path, "meminfo")); err != nil { return nil, err } + totUsable = mi.totalUsableBytes() blockSizeBytes, err = memoryBlockSizeBytes(paths.SysDevicesSystemMemory) if err == nil { @@ -235,66 +249,124 @@ func memTotalPhysicalBytesFromSyslog(paths *linuxpath.Paths) int64 { return -1 } -func memTotalUsableBytes(paths *linuxpath.Paths) int64 { - amount, err := memoryTotalUsableBytesFromPath(paths.ProcMeminfo) - if err != nil { - return -1 - } - return amount -} +// memInfo is a /proc/meminfo file parsed into its key:value blocks, with all +// values ending in a "kB" suffix having their values multiplied by 1024. +type memInfo map[string]int64 -func memoryTotalUsableBytesFromPath(meminfoPath string) (int64, error) { - // In Linux, /proc/meminfo or its close relative - // /sys/devices/system/node/node*/meminfo - // contains a set of memory-related amounts, with - // lines looking like the following: - // - // $ cat /proc/meminfo - // MemTotal: 24677596 kB - // MemFree: 21244356 kB - // MemAvailable: 22085432 kB - // ... - // HugePages_Total: 0 - // HugePages_Free: 0 - // HugePages_Rsvd: 0 - // HugePages_Surp: 0 - // ... - // - // It's worth noting that /proc/meminfo returns exact information, not - // "theoretical" information. For instance, on the above system, I have - // 24GB of RAM but MemTotal is indicating only around 23GB. This is because - // MemTotal contains the exact amount of *usable* memory after accounting - // for the kernel's resident memory size and a few reserved bits. - // Please note GHW cares about the subset of lines shared between system-wide - // and per-NUMA-node meminfos. For more information, see: - // - // https://www.kernel.org/doc/Documentation/filesystems/proc.txt - r, err := os.Open(meminfoPath) +// load accepts a path and loads the memInfo map by parsing the supplied +// /proc/meminfo file. +// +// In Linux, /proc/meminfo or its close relative +// /sys/devices/system/node/node*/meminfo contains a set of memory-related +// amounts, with lines looking like the following: +// +// $ cat /proc/meminfo +// MemTotal: 24677596 kB +// MemFree: 21244356 kB +// MemAvailable: 22085432 kB +// ... +// HugePages_Total: 0 +// HugePages_Free: 0 +// HugePages_Rsvd: 0 +// HugePages_Surp: 0 +// ... +// +// The /sys/devices/system/node/node*/meminfo files look like this, however: +// +// Node 0 MemTotal: 24677596 kB +// Node 0 MemFree: 21244356 kB +// Node 0 MemAvailable: 22085432 kB +// ... +// Node 0 HugePages_Total: 0 +// Node 0 HugePages_Free: 0 +// Node 0 HugePages_Rsvd: 0 +// Node 0 HugePages_Surp: 0 +// ... +// +// It's worth noting that /proc/meminfo returns exact information, not +// "theoretical" information. For instance, on the above system, I have 24GB of +// RAM but MemTotal is indicating only around 23GB. This is because MemTotal +// contains the exact amount of *usable* memory after accounting for the +// kernel's resident memory size and a few reserved bits. Please note GHW +// cares about the subset of lines shared between system-wide and per-NUMA-node +// meminfos. For more information, see: +// +// https://www.kernel.org/doc/Documentation/filesystems/proc.txt +func (mi memInfo) load(fp string) error { + r, err := os.Open(fp) if err != nil { - return -1, err + return err } defer util.SafeClose(r) scanner := bufio.NewScanner(r) for scanner.Scan() { line := scanner.Text() - parts := strings.Split(line, ":") - key := parts[0] - if !strings.Contains(key, "MemTotal") { - continue + if strings.HasPrefix(line, "Node") { + // For the /sys/devices/system/node/nodeX/meminfo files, the lines + // all start with "Node X ". We need to strip all that off. + fields := strings.Fields(line) + line = strings.Join(fields[2:], "") } - rawValue := parts[1] - inKb := strings.HasSuffix(rawValue, "kB") - value, err := strconv.Atoi(strings.TrimSpace(strings.TrimSuffix(rawValue, "kB"))) + parts := strings.Split(line, ":") + key := strings.TrimSpace(parts[0]) + raw := parts[1] + inKb := strings.HasSuffix(raw, "kB") + v, err := strconv.Atoi( + strings.TrimSpace( + strings.TrimSuffix( + raw, "kB", + ), + ), + ) + v64 := int64(v) if err != nil { - return -1, err + return err } if inKb { - value = value * int(unitutil.KB) + v64 = v64 * unitutil.KB } - return int64(value), nil + mi[key] = v64 + } + return nil +} + +// totalUsageBytes returns the MemTotal entry from the memInfo map +func (mi memInfo) totalUsableBytes() int64 { + v, ok := mi["MemTotal"] + if !ok { + return -1 + } + return v +} + +// totalUsedBytes returns the total used memory from the memInfo map. +// We calculate used memory with the following formula: +// mem_total - (mem_free + mem_buffered + mem_cached + mem_slab_reclaimable) +func (mi memInfo) totalUsedBytes() int64 { + mf, ok := mi["MemFree"] + if !ok { + return -1 + } + mc, ok := mi["Cached"] + if !ok { + return -1 + } + mb, ok := mi["Buffers"] + if !ok { + return -1 } - return -1, fmt.Errorf("failed to find MemTotal entry in path %q", meminfoPath) + mt, ok := mi["MemTotal"] + if !ok { + return -1 + } + if sr, ok := mi["SReclaimable"]; ok { + return mt - (mf + mb + mc + sr) + } else if st, ok := mi["Slab"]; ok { + // If detailed slab information isn't present, fall back to slab total. + return mt - (mf + mb + mc + st) + } + return -1 } func memorySupportedPageSizes(hpDir string) ([]uint64, error) { diff --git a/pkg/option/option.go b/pkg/option/option.go index 7ce14016..2a82ca84 100644 --- a/pkg/option/option.go +++ b/pkg/option/option.go @@ -18,6 +18,7 @@ const ( const ( envKeyChroot = "GHW_CHROOT" + envKeyCollectUsage = "GHW_COLLECT_USAGE" envKeyDisableWarnings = "GHW_DISABLE_WARNINGS" envKeyDisableTools = "GHW_DISABLE_TOOLS" envKeySnapshotPath = "GHW_SNAPSHOT_PATH" @@ -109,6 +110,16 @@ func EnvOrDefaultTools() bool { return true } +// EnvOrDefaultCollectUsage returns true if the GHW_COLLECT_USAGE environs +// variable is set, which indicates ghw should collect current resource usage +// information, false otherwise. +func EnvOrDefaultCollectUsage() bool { + if _, exists := os.LookupEnv(envKeyCollectUsage); exists { + return true + } + return false +} + // Option is used to represent optionally-configured settings. Each field is a // pointer to some concrete value so that we can tell when something has been // set or left unset. @@ -122,6 +133,9 @@ type Option struct { // build its paths from that location instead of / Chroot *string + // CollectUsage informs ghw to collect current resource usage information + CollectUsage *bool + // Snapshot contains options for handling ghw snapshots Snapshot *SnapshotOptions @@ -168,6 +182,11 @@ func WithChroot(dir string) *Option { return &Option{Chroot: &dir} } +// WithCollectUsage tells ghw to collect current resource usage. +func WithCollectUsage(toggle bool) *Option { + return &Option{CollectUsage: &toggle} +} + // WithSnapshot sets snapshot-processing options for a ghw run func WithSnapshot(opts SnapshotOptions) *Option { return &Option{ @@ -217,6 +236,9 @@ func Merge(opts ...*Option) *Option { if opt.Chroot != nil { merged.Chroot = opt.Chroot } + if opt.CollectUsage != nil { + merged.CollectUsage = opt.CollectUsage + } if opt.Snapshot != nil { merged.Snapshot = opt.Snapshot } @@ -239,6 +261,10 @@ func Merge(opts ...*Option) *Option { chroot := EnvOrDefaultChroot() merged.Chroot = &chroot } + if merged.CollectUsage == nil { + enabled := EnvOrDefaultCollectUsage() + merged.CollectUsage = &enabled + } if merged.Alerter == nil { merged.Alerter = EnvOrDefaultAlerter() }