Skip to content

Commit

Permalink
refactor: don't duplicate AMD GPU temperature retrieval (#1682)
Browse files Browse the repository at this point in the history
* some file/struct renaming

* refactor: don't get AMD gpu temperatures twice
  • Loading branch information
ClementTsang authored Feb 23, 2025
1 parent f7d070f commit d2177ed
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 202 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ That said, these are more guidelines rather than hardset rules, though the proje
- `--colors` is now `--theme`
- [#1513](https://github.com/ClementTsang/bottom/pull/1513): Table headers are now bold by default.
- [#1515](https://github.com/ClementTsang/bottom/pull/1515): Show the config path in the error message if unable to read/create a config.
- [#1682](https://github.com/ClementTsang/bottom/pull/1682): On Linux, temperature sensor labels now always have their first letter capitalized (e.g. "k10temp: tctl" -> "k10temp: Tctl").

### Bug Fixes

Expand Down
20 changes: 8 additions & 12 deletions src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ pub mod nvidia;
#[cfg(all(target_os = "linux", feature = "gpu"))]
pub mod amd;

#[cfg(target_os = "linux")]
mod linux {
pub mod utils;
}

#[cfg(feature = "battery")]
pub mod batteries;
pub mod cpu;
Expand Down Expand Up @@ -370,18 +375,9 @@ impl DataCollector {
}

#[cfg(target_os = "linux")]
if let Some(data) = amd::get_amd_vecs(
&self.filters.temp_filter,
&self.widgets_to_harvest,
self.last_collection_time,
) {
if let Some(mut temp) = data.temperature {
if let Some(sensors) = &mut self.data.temperature_sensors {
sensors.append(&mut temp);
} else {
self.data.temperature_sensors = Some(temp);
}
}
if let Some(data) =
amd::get_amd_vecs(&self.widgets_to_harvest, self.last_collection_time)
{
if let Some(mut mem) = data.memory {
local_gpu.append(&mut mem);
}
Expand Down
153 changes: 30 additions & 123 deletions src/collection/amd.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
mod amdgpu_marketing;
mod amd_gpu_marketing;

use std::{
fs::{self, read_to_string},
Expand All @@ -10,30 +10,23 @@ use std::{

use hashbrown::{HashMap, HashSet};

use crate::{
app::{filter::Filter, layout_manager::UsedWidgets},
collection::{memory::MemData, temperature::TempSensorData},
};
use crate::{app::layout_manager::UsedWidgets, collection::memory::MemData};

use super::linux::utils::is_device_awake;

// TODO: May be able to clean up some of these, Option<Vec> for example is a bit redundant.
pub struct AMDGPUData {
pub struct AmdGpuData {
pub memory: Option<Vec<(String, MemData)>>,
pub temperature: Option<Vec<TempSensorData>>,
pub procs: Option<(u64, Vec<HashMap<u32, (u64, u32)>>)>,
}

pub struct AMDGPUMemory {
pub struct AmdGpuMemory {
pub total: u64,
pub used: u64,
}

pub struct AMDGPUTemperature {
pub name: String,
pub temperature: f32,
}

#[derive(Debug, Clone, Default, Eq, PartialEq)]
pub struct AMDGPUProc {
pub struct AmdGpuProc {
pub vram_usage: u64,
pub gfx_usage: u64,
pub dma_usage: u64,
Expand All @@ -46,7 +39,7 @@ pub struct AMDGPUProc {
}

// needs previous state for usage calculation
static PROC_DATA: LazyLock<Mutex<HashMap<PathBuf, HashMap<u32, AMDGPUProc>>>> =
static PROC_DATA: LazyLock<Mutex<HashMap<PathBuf, HashMap<u32, AmdGpuProc>>>> =
LazyLock::new(|| Mutex::new(HashMap::new()));

fn get_amd_devs() -> Option<Vec<PathBuf>> {
Expand All @@ -62,7 +55,18 @@ fn get_amd_devs() -> Option<Vec<PathBuf>> {

// test if it has a valid vendor path
let device_path = path.path();
let test_path = device_path.join("vendor");
if !device_path.is_dir() {
continue;
}

// Skip if asleep to avoid wakeups.
if !is_device_awake(&device_path) {
continue;
}

// This will exist for GPUs but not others, this is how we find their kernel
// name.
let test_path = device_path.join("drm");
if test_path.as_path().exists() {
devices.push(device_path);
}
Expand All @@ -75,7 +79,7 @@ fn get_amd_devs() -> Option<Vec<PathBuf>> {
}
}

fn get_amd_name(device_path: &Path) -> Option<String> {
pub fn get_amd_name(device_path: &Path) -> Option<String> {
// get revision and device ids from sysfs
let rev_path = device_path.join("revision");
let dev_path = device_path.join("device");
Expand Down Expand Up @@ -107,13 +111,13 @@ fn get_amd_name(device_path: &Path) -> Option<String> {
}

// if it exists in our local database, use that name
amdgpu_marketing::AMDGPU_MARKETING_NAME
amd_gpu_marketing::AMD_GPU_MARKETING_NAME
.iter()
.find(|(did, rid, _)| (did, rid) == (&device_id, &revision_id))
.map(|tuple| tuple.2.to_string())
}

fn get_amd_vram(device_path: &Path) -> Option<AMDGPUMemory> {
fn get_amd_vram(device_path: &Path) -> Option<AmdGpuMemory> {
// get vram memory info from sysfs
let vram_total_path = device_path.join("mem_info_vram_total");
let vram_used_path = device_path.join("mem_info_vram_used");
Expand All @@ -136,93 +140,12 @@ fn get_amd_vram(device_path: &Path) -> Option<AMDGPUMemory> {
return None;
};

Some(AMDGPUMemory {
Some(AmdGpuMemory {
total: vram_total,
used: vram_used,
})
}

fn get_amd_temp(device_path: &Path) -> Option<Vec<AMDGPUTemperature>> {
let mut temperatures = Vec::new();

// get hardware monitoring sensor info from sysfs
let hwmon_root = device_path.join("hwmon");

let Ok(hwmon_paths) = fs::read_dir(hwmon_root) else {
return None;
};

for hwmon_dir in hwmon_paths {
let Ok(hwmon_dir) = hwmon_dir else {
continue;
};

let hwmon_binding = hwmon_dir.path();
let hwmon_path = hwmon_binding.as_path();
let Ok(hwmon_sensors) = fs::read_dir(hwmon_path) else {
continue;
};

for hwmon_sensor_ent in hwmon_sensors {
let Ok(hwmon_sensor_ent) = hwmon_sensor_ent else {
continue;
};

let hwmon_sensor_path = hwmon_sensor_ent.path();
let hwmon_sensor_binding = hwmon_sensor_ent.file_name();
let Some(hwmon_sensor_name) = hwmon_sensor_binding.to_str() else {
continue;
};

// temperature sensors are temp{number}_{input,label}
if !hwmon_sensor_name.starts_with("temp") || !hwmon_sensor_name.ends_with("_input") {
continue; // filename does not start with temp or ends with input
}

// construct label path
let hwmon_sensor_label_name = hwmon_sensor_name.replace("_input", "_label");
let hwmon_sensor_label_path = hwmon_path.join(hwmon_sensor_label_name);

// read and remove newlines
let Ok(mut hwmon_sensor_data) = read_to_string(hwmon_sensor_path) else {
continue;
};

let Ok(mut hwmon_sensor_label) = read_to_string(hwmon_sensor_label_path) else {
continue;
};

hwmon_sensor_data = hwmon_sensor_data.trim_end().to_string();
hwmon_sensor_label = hwmon_sensor_label.trim_end().to_string();

let Ok(hwmon_sensor) = hwmon_sensor_data.parse::<u64>() else {
continue;
};

// uppercase first character
if hwmon_sensor_label.is_ascii() {
let (hwmon_sensor_label_head, hwmon_sensor_label_tail) =
hwmon_sensor_label.split_at(1);

hwmon_sensor_label =
hwmon_sensor_label_head.to_uppercase() + hwmon_sensor_label_tail;
}

// 1 C is reported as 1000
temperatures.push(AMDGPUTemperature {
name: hwmon_sensor_label,
temperature: (hwmon_sensor as f32) / 1000.0f32,
});
}
}

if temperatures.is_empty() {
None
} else {
Some(temperatures)
}
}

// from amdgpu_top: https://github.com/Umio-Yasuno/amdgpu_top/blob/c961cf6625c4b6d63fda7f03348323048563c584/crates/libamdgpu_top/src/stat/fdinfo/proc_info.rs#L114
fn diff_usage(pre: u64, cur: u64, interval: &Duration) -> u64 {
use std::ops::Mul;
Expand Down Expand Up @@ -300,7 +223,7 @@ fn get_amdgpu_drm(device_path: &Path) -> Option<Vec<PathBuf>> {
}
}

fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AmdGpuProc>> {
let mut fdinfo = HashMap::new();

let drm_paths = get_amdgpu_drm(device_path)?;
Expand Down Expand Up @@ -336,7 +259,7 @@ fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
continue;
};

let mut usage: AMDGPUProc = Default::default();
let mut usage: AmdGpuProc = Default::default();

let mut observed_ids: HashSet<usize> = HashSet::new();

Expand Down Expand Up @@ -401,20 +324,17 @@ fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
Some(fdinfo)
}

pub fn get_amd_vecs(
filter: &Option<Filter>, widgets_to_harvest: &UsedWidgets, prev_time: Instant,
) -> Option<AMDGPUData> {
pub fn get_amd_vecs(widgets_to_harvest: &UsedWidgets, prev_time: Instant) -> Option<AmdGpuData> {
let device_path_list = get_amd_devs()?;
let interval = Instant::now().duration_since(prev_time);
let num_gpu = device_path_list.len();
let mut temp_vec = Vec::with_capacity(num_gpu);
let mut mem_vec = Vec::with_capacity(num_gpu);
let mut proc_vec = Vec::with_capacity(num_gpu);
let mut total_mem = 0;

for device_path in device_path_list {
let device_name =
get_amd_name(&device_path).unwrap_or(amdgpu_marketing::AMDGPU_DEFAULT_NAME.to_string());
let device_name = get_amd_name(&device_path)
.unwrap_or(amd_gpu_marketing::AMDGPU_DEFAULT_NAME.to_string());

if let Some(mem) = get_amd_vram(&device_path) {
if widgets_to_harvest.use_mem {
Expand All @@ -432,18 +352,6 @@ pub fn get_amd_vecs(
total_mem += mem.total
}

// TODO: Not sure if this overlaps with the existing generic temperature code.
if widgets_to_harvest.use_temp && Filter::optional_should_keep(filter, &device_name) {
if let Some(temperatures) = get_amd_temp(&device_path) {
for info in temperatures {
temp_vec.push(TempSensorData {
name: format!("{} {}", device_name, info.name),
temperature: Some(info.temperature),
});
}
}
}

if widgets_to_harvest.use_proc {
if let Some(procs) = get_amd_fdinfo(&device_path) {
let mut proc_info = PROC_DATA.lock().unwrap();
Expand Down Expand Up @@ -497,9 +405,8 @@ pub fn get_amd_vecs(
}
}

Some(AMDGPUData {
Some(AmdGpuData {
memory: (!mem_vec.is_empty()).then_some(mem_vec),
temperature: (!temp_vec.is_empty()).then_some(temp_vec),
procs: (!proc_vec.is_empty()).then_some((total_mem, proc_vec)),
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

pub const AMDGPU_DEFAULT_NAME: &str = "AMD Radeon Graphics";

pub const AMDGPU_MARKETING_NAME: &[(u32, u32, &str)] = &[
pub const AMD_GPU_MARKETING_NAME: &[(u32, u32, &str)] = &[
(0x6798, 0x00, "AMD Radeon R9 200 / HD 7900 Series"),
(0x6799, 0x00, "AMD Radeon HD 7900 Series"),
(0x679A, 0x00, "AMD Radeon HD 7900 Series"),
Expand Down
30 changes: 30 additions & 0 deletions src/collection/linux/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use std::{fs, path::Path};

/// Whether the temperature should *actually* be read during enumeration.
/// Will return false if the state is not D0/unknown, or if it does not support
/// `device/power_state`.
///
/// `path` is a path to the device itself (e.g. `/sys/class/hwmon/hwmon1/device`).
#[inline]
pub fn is_device_awake(device: &Path) -> bool {
// Whether the temperature should *actually* be read during enumeration.
// Set to false if the device is in ACPI D3cold.
// Documented at https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power_state
let power_state = device.join("power_state");
if power_state.exists() {
if let Ok(state) = fs::read_to_string(power_state) {
let state = state.trim();
// The zenpower3 kernel module (incorrectly?) reports "unknown", causing this
// check to fail and temperatures to appear as zero instead of
// having the file not exist.
//
// Their self-hosted git instance has disabled sign up, so this bug cant be
// reported either.
state == "D0" || state == "unknown"
} else {
true
}
} else {
true
}
}
4 changes: 2 additions & 2 deletions src/collection/temperature.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Data collection for temperature metrics.
//!
//! For Linux and macOS, this is handled by Heim.
//! For Windows, this is handled by sysinfo.
//! For Linux, this is handled by custom code.
//! For everything else, this is handled by sysinfo.
cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
Expand Down
Loading

0 comments on commit d2177ed

Please sign in to comment.