Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #126 - report total memory and per-job rss #131

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sonar"
version = "0.7.0"
version = "0.8.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ the job, one per host; see `job` below.
`cores` (optional, default "0"): The number of cores on this host, a nonnegative integer, with 0
meaning "unknown".

`memtotalkib` (optional, default "0"): The amount of physical RAM on this host, a nonnegative
integer, with 0 meaning "unknown".

`user` (required): The local Unix user name of user owning the job, an alphanumeric string. This
can also be `_zombie_<pid>` for zombie processes, where `<pid>` is the process ID of the process.

Expand All @@ -181,7 +184,11 @@ holds the process ID.
process (ie computed independently of the sonar log), a nonnegative floating-point number. 100.0
corresponds to "one full core's worth of computation".

`cpukib` (optional, default "0"): The current CPU memory used in KiB, a nonnegative integer.
`cpukib` (optional, default "0"): The current CPU data virtual memory used in KiB, a nonnegative
integer.

`residentkib` (optional, default "0"): The current CPU data resident memory in KiB, a nonnegative
integer.

`gpus` (optional, default "none"): The list of GPUs currently used by the job, a comma-separated
list of GPU device numbers, all of them nonnegative integers. The value can instead be `none` when
Expand Down
696 changes: 351 additions & 345 deletions src/process.rs

Large diffs are not rendered by default.

129 changes: 72 additions & 57 deletions src/procfs.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
/// Collect CPU process information without GPU information, from files in /proc.

use crate::process;
use crate::procfsapi;
use crate::procfsapi::{self, parse_usize_field};

use std::collections::HashMap;

/// Read the /proc/meminfo file from the fs and return the value for total installed memory.

pub fn get_memtotal_kib(fs: &dyn procfsapi::ProcfsAPI) -> Result<usize, String> {
let mut memtotal_kib = 0;
let meminfo_s = fs.read_to_string("meminfo")?;
for l in meminfo_s.split('\n') {
if l.starts_with("MemTotal: ") {
// We expect "MemTotal:\s+(\d+)\s+kB", roughly
let fields = l.split_ascii_whitespace().collect::<Vec<&str>>();
if fields.len() != 3 || fields[2] != "kB" {
return Err(format!("Unexpected MemTotal in /proc/meminfo: {l}"));
}
memtotal_kib = parse_usize_field(&fields, 1, l, "meminfo", 0, "MemTotal")?;
break;
}
}
if memtotal_kib == 0 {
return Err(format!("Could not find MemTotal in /proc/meminfo: {meminfo_s}"));
}
Ok(memtotal_kib)
}

/// Obtain process information via /proc and return a vector of structures with all the information
/// we need. In the returned vector, pids uniquely tag the records.
///
Expand All @@ -17,7 +39,7 @@ use std::collections::HashMap;
/// The underlying computing system -- /proc, system tables, and clock -- is virtualized through the
/// ProcfsAPI instance.

pub fn get_process_information(fs: &dyn procfsapi::ProcfsAPI) -> Result<Vec<process::Process>, String> {
pub fn get_process_information(fs: &dyn procfsapi::ProcfsAPI, memtotal_kib: usize) -> Result<Vec<process::Process>, String> {
// The boot time is the `btime` field of /proc/stat. It is measured in seconds since epoch. We
// need this to compute the process's real time, which we need to compute ps-compatible cpu
// utilization.
Expand All @@ -35,28 +57,6 @@ pub fn get_process_information(fs: &dyn procfsapi::ProcfsAPI) -> Result<Vec<proc
return Err(format!("Could not find btime in /proc/stat: {stat_s}"));
}

// The total RAM installed is in the `MemTotal` field of /proc/meminfo. We need this to compute
// ps-compatible relative memory use.

let mut memtotal_kib = 0;
let meminfo_s = fs.read_to_string("meminfo")?;
for l in meminfo_s.split('\n') {
if l.starts_with("MemTotal: ") {
// We expect "MemTotal:\s+(\d+)\s+kB", roughly
let fields = l.split_ascii_whitespace().collect::<Vec<&str>>();
if fields.len() != 3 || fields[2] != "kB" {
return Err(format!("Unexpected MemTotal in /proc/meminfo: {l}"));
}
memtotal_kib = parse_usize_field(&fields, 1, l, "meminfo", 0, "MemTotal")?;
break;
}
}
if memtotal_kib == 0 {
return Err(format!(
"Could not find MemTotal in /proc/meminfo: {meminfo_s}"
));
}

// Enumerate all pids, and collect the uids while we're here.
//
// Just ignore dirents that cause trouble, there wouldn't normally be any in proc, but if there
Expand Down Expand Up @@ -193,6 +193,31 @@ pub fn get_process_information(fs: &dyn procfsapi::ProcfsAPI) -> Result<Vec<proc
continue;
}

// The best value for resident memory is probably RssAnon of /proc/{pid}/status, which
// corresponds to "private data". It does not include text or file mappings, though these
// actually also take up real memory. But text can be shared, which matters both when we
// roll up processes and when the program is executing multiple times on the system, and
// file mappings, when they exist, are frequently read-only and evictable.
let mut resident_kib = 0;
if let Ok(status_info) = fs.read_to_string(&format!("{pid}/status")) {
for l in status_info.split('\n') {
if l.starts_with("RssAnon:") {
// We expect "RssAnon:\s+(\d+)\s+kB", roughly; there may be tabs.
let fields = l.split_ascii_whitespace().collect::<Vec<&str>>();
if fields.len() != 3 || fields[2] != "kB" {
return Err(format!("Unexpected RssAnon in /proc/{pid}/status: {l}"));
}
resident_kib = parse_usize_field(&fields, 1, &l, "status", pid, "private resident set size")?
* kib_per_page;
break;
}
}
}
if resident_kib == 0 {
// This is *usually* benign - see above.
continue;
}

// Now compute some derived quantities.

// pcpu and pmem are rounded to ##.#. We're going to get slightly different answers here
Expand Down Expand Up @@ -223,6 +248,7 @@ pub fn get_process_information(fs: &dyn procfsapi::ProcfsAPI) -> Result<Vec<proc
mem_pct: pmem,
cputime_sec,
mem_size_kib: size_kib,
resident_kib,
ppid,
session: sess,
command: comm,
Expand All @@ -232,37 +258,6 @@ pub fn get_process_information(fs: &dyn procfsapi::ProcfsAPI) -> Result<Vec<proc
Ok(result)
}

fn parse_usize_field(
fields: &[&str],
ix: usize,
line: &str,
file: &str,
pid: usize,
fieldname: &str,
) -> Result<usize, String> {
if ix >= fields.len() {
if pid == 0 {
return Err(format!("Index out of range for /proc/{file}: {ix}: {line}"));
} else {
return Err(format!(
"Index out of range for /proc/{pid}/{file}: {ix}: {line}"
));
}
}
if let Ok(n) = fields[ix].parse::<usize>() {
return Ok(n);
}
if pid == 0 {
Err(format!(
"Could not parse {fieldname} in /proc/{file}: {line}"
))
} else {
Err(format!(
"Could not parse {fieldname} from /proc/{pid}/{file}: {line}"
))
}
}

// The UserTable optimizes uid -> name lookup.

struct UserTable {
Expand Down Expand Up @@ -382,6 +377,10 @@ DirectMap1G: 11534336 kB
"4018/statm".to_string(),
"1255967 185959 54972 200 0 316078 0".to_string(),
);
files.insert(
"4018/status".to_string(),
"RssAnon: 12345 kB".to_string(),
);

let ticks_per_sec = 100.0; // We define this
let utime_ticks = 51361.0; // field(/proc/4018/stat, 14)
Expand All @@ -391,12 +390,14 @@ DirectMap1G: 11534336 kB
let rss: f64 = 185959.0 * 4.0; // pages_to_kib(field(/proc/4018/statm, 1))
let memtotal = 16093776.0; // field(/proc/meminfo, "MemTotal:")
let size = 316078 * 4; // pages_to_kib(field(/proc/4018/statm, 5))
let resident = 12345 * 4; // pages_to_kib(field(/proc/4018/status, "RssAnon:"))

// now = boot_time + start_time + utime_ticks + stime_ticks + arbitrary idle time
let now = (boot_time + (start_ticks / ticks_per_sec) + (utime_ticks / ticks_per_sec) + (stime_ticks / ticks_per_sec) + 2000.0) as u64;

let fs = procfsapi::MockFS::new(files, pids, users, now);
let info = get_process_information(&fs).unwrap();
let memtotal_kib = get_memtotal_kib(&fs).unwrap();
let info = get_process_information(&fs, memtotal_kib).unwrap();
assert!(info.len() == 1);
let p = &info[0];
assert!(p.pid == 4018); // from enumeration of /proc
Expand All @@ -418,6 +419,7 @@ DirectMap1G: 11534336 kB
assert!(p.mem_pct == mem_pct);

assert!(p.mem_size_kib == size);
assert!(p.resident_kib == resident);
}

#[test]
Expand Down Expand Up @@ -452,9 +454,22 @@ pub fn procfs_dead_and_undead_test() {
"4020/statm".to_string(),
"1255967 185959 54972 200 0 316078 0".to_string(),
);
files.insert(
"4018/status".to_string(),
"RssAnon: 12345 kB".to_string(),
);
files.insert(
"4019/status".to_string(),
"RssAnon: 12345 kB".to_string(),
);
files.insert(
"4020/status".to_string(),
"RssAnon: 12345 kB".to_string(),
);

let fs = procfsapi::MockFS::new(files, pids, users, procfsapi::unix_now());
let info = get_process_information(&fs).unwrap();
let memtotal_kib = get_memtotal_kib(&fs).unwrap();
let info = get_process_information(&fs, memtotal_kib).unwrap();

// 4020 should be dropped - it's dead
assert!(info.len() == 2);
Expand Down
31 changes: 31 additions & 0 deletions src/procfsapi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,37 @@ pub fn unix_now() -> u64 {
.as_secs()
}

pub fn parse_usize_field(
fields: &[&str],
ix: usize,
line: &str,
file: &str,
pid: usize,
fieldname: &str,
) -> Result<usize, String> {
if ix >= fields.len() {
if pid == 0 {
return Err(format!("Index out of range for /proc/{file}: {ix}: {line}"));
} else {
return Err(format!(
"Index out of range for /proc/{pid}/{file}: {ix}: {line}"
));
}
}
if let Ok(n) = fields[ix].parse::<usize>() {
return Ok(n);
}
if pid == 0 {
Err(format!(
"Could not parse {fieldname} in /proc/{file}: {line}"
))
} else {
Err(format!(
"Could not parse {fieldname} from /proc/{pid}/{file}: {line}"
))
}
}

// MockFS is used for testing, it is instantiated with the values we want it to return.

#[cfg(test)]
Expand Down
Loading
Loading