Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: don't duplicate AMD GPU temperature retrieval #1682

Merged
merged 4 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ That said, these are more guidelines rather than hardset rules, though the proje
- `--colors` is now `--theme`
- [#1513](https://github.com/ClementTsang/bottom/pull/1513): Table headers are now bold by default.
- [#1515](https://github.com/ClementTsang/bottom/pull/1515): Show the config path in the error message if unable to read/create a config.
- [#1682](https://github.com/ClementTsang/bottom/pull/1682): On Linux, temperature sensor labels now always have their first letter capitalized (e.g. "k10temp: tctl" -> "k10temp: Tctl").

### Bug Fixes

Expand Down
20 changes: 8 additions & 12 deletions src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ pub mod nvidia;
#[cfg(all(target_os = "linux", feature = "gpu"))]
pub mod amd;

#[cfg(target_os = "linux")]
mod linux {
pub mod utils;
}

#[cfg(feature = "battery")]
pub mod batteries;
pub mod cpu;
Expand Down Expand Up @@ -370,18 +375,9 @@ impl DataCollector {
}

#[cfg(target_os = "linux")]
if let Some(data) = amd::get_amd_vecs(
&self.filters.temp_filter,
&self.widgets_to_harvest,
self.last_collection_time,
) {
if let Some(mut temp) = data.temperature {
if let Some(sensors) = &mut self.data.temperature_sensors {
sensors.append(&mut temp);
} else {
self.data.temperature_sensors = Some(temp);
}
}
if let Some(data) =
amd::get_amd_vecs(&self.widgets_to_harvest, self.last_collection_time)
{
if let Some(mut mem) = data.memory {
local_gpu.append(&mut mem);
}
Expand Down
153 changes: 30 additions & 123 deletions src/collection/amd.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
mod amdgpu_marketing;
mod amd_gpu_marketing;

use std::{
fs::{self, read_to_string},
Expand All @@ -10,30 +10,23 @@ use std::{

use hashbrown::{HashMap, HashSet};

use crate::{
app::{filter::Filter, layout_manager::UsedWidgets},
collection::{memory::MemData, temperature::TempSensorData},
};
use crate::{app::layout_manager::UsedWidgets, collection::memory::MemData};

use super::linux::utils::is_device_awake;

// TODO: May be able to clean up some of these, Option<Vec> for example is a bit redundant.
pub struct AMDGPUData {
pub struct AmdGpuData {
pub memory: Option<Vec<(String, MemData)>>,
pub temperature: Option<Vec<TempSensorData>>,
pub procs: Option<(u64, Vec<HashMap<u32, (u64, u32)>>)>,
}

pub struct AMDGPUMemory {
pub struct AmdGpuMemory {
pub total: u64,
pub used: u64,
}

pub struct AMDGPUTemperature {
pub name: String,
pub temperature: f32,
}

#[derive(Debug, Clone, Default, Eq, PartialEq)]
pub struct AMDGPUProc {
pub struct AmdGpuProc {
pub vram_usage: u64,
pub gfx_usage: u64,
pub dma_usage: u64,
Expand All @@ -46,7 +39,7 @@ pub struct AMDGPUProc {
}

// needs previous state for usage calculation
static PROC_DATA: LazyLock<Mutex<HashMap<PathBuf, HashMap<u32, AMDGPUProc>>>> =
static PROC_DATA: LazyLock<Mutex<HashMap<PathBuf, HashMap<u32, AmdGpuProc>>>> =
LazyLock::new(|| Mutex::new(HashMap::new()));

fn get_amd_devs() -> Option<Vec<PathBuf>> {
Expand All @@ -62,7 +55,18 @@ fn get_amd_devs() -> Option<Vec<PathBuf>> {

// test if it has a valid vendor path
let device_path = path.path();
let test_path = device_path.join("vendor");
if !device_path.is_dir() {
continue;
}

// Skip if asleep to avoid wakeups.
if !is_device_awake(&device_path) {
continue;
}

// This will exist for GPUs but not others, this is how we find their kernel
// name.
let test_path = device_path.join("drm");
if test_path.as_path().exists() {
devices.push(device_path);
}
Expand All @@ -75,7 +79,7 @@ fn get_amd_devs() -> Option<Vec<PathBuf>> {
}
}

fn get_amd_name(device_path: &Path) -> Option<String> {
pub fn get_amd_name(device_path: &Path) -> Option<String> {
// get revision and device ids from sysfs
let rev_path = device_path.join("revision");
let dev_path = device_path.join("device");
Expand Down Expand Up @@ -107,13 +111,13 @@ fn get_amd_name(device_path: &Path) -> Option<String> {
}

// if it exists in our local database, use that name
amdgpu_marketing::AMDGPU_MARKETING_NAME
amd_gpu_marketing::AMD_GPU_MARKETING_NAME
.iter()
.find(|(did, rid, _)| (did, rid) == (&device_id, &revision_id))
.map(|tuple| tuple.2.to_string())
}

fn get_amd_vram(device_path: &Path) -> Option<AMDGPUMemory> {
fn get_amd_vram(device_path: &Path) -> Option<AmdGpuMemory> {
// get vram memory info from sysfs
let vram_total_path = device_path.join("mem_info_vram_total");
let vram_used_path = device_path.join("mem_info_vram_used");
Expand All @@ -136,93 +140,12 @@ fn get_amd_vram(device_path: &Path) -> Option<AMDGPUMemory> {
return None;
};

Some(AMDGPUMemory {
Some(AmdGpuMemory {
total: vram_total,
used: vram_used,
})
}

fn get_amd_temp(device_path: &Path) -> Option<Vec<AMDGPUTemperature>> {
let mut temperatures = Vec::new();

// get hardware monitoring sensor info from sysfs
let hwmon_root = device_path.join("hwmon");

let Ok(hwmon_paths) = fs::read_dir(hwmon_root) else {
return None;
};

for hwmon_dir in hwmon_paths {
let Ok(hwmon_dir) = hwmon_dir else {
continue;
};

let hwmon_binding = hwmon_dir.path();
let hwmon_path = hwmon_binding.as_path();
let Ok(hwmon_sensors) = fs::read_dir(hwmon_path) else {
continue;
};

for hwmon_sensor_ent in hwmon_sensors {
let Ok(hwmon_sensor_ent) = hwmon_sensor_ent else {
continue;
};

let hwmon_sensor_path = hwmon_sensor_ent.path();
let hwmon_sensor_binding = hwmon_sensor_ent.file_name();
let Some(hwmon_sensor_name) = hwmon_sensor_binding.to_str() else {
continue;
};

// temperature sensors are temp{number}_{input,label}
if !hwmon_sensor_name.starts_with("temp") || !hwmon_sensor_name.ends_with("_input") {
continue; // filename does not start with temp or ends with input
}

// construct label path
let hwmon_sensor_label_name = hwmon_sensor_name.replace("_input", "_label");
let hwmon_sensor_label_path = hwmon_path.join(hwmon_sensor_label_name);

// read and remove newlines
let Ok(mut hwmon_sensor_data) = read_to_string(hwmon_sensor_path) else {
continue;
};

let Ok(mut hwmon_sensor_label) = read_to_string(hwmon_sensor_label_path) else {
continue;
};

hwmon_sensor_data = hwmon_sensor_data.trim_end().to_string();
hwmon_sensor_label = hwmon_sensor_label.trim_end().to_string();

let Ok(hwmon_sensor) = hwmon_sensor_data.parse::<u64>() else {
continue;
};

// uppercase first character
if hwmon_sensor_label.is_ascii() {
let (hwmon_sensor_label_head, hwmon_sensor_label_tail) =
hwmon_sensor_label.split_at(1);

hwmon_sensor_label =
hwmon_sensor_label_head.to_uppercase() + hwmon_sensor_label_tail;
}

// 1 C is reported as 1000
temperatures.push(AMDGPUTemperature {
name: hwmon_sensor_label,
temperature: (hwmon_sensor as f32) / 1000.0f32,
});
}
}

if temperatures.is_empty() {
None
} else {
Some(temperatures)
}
}

// from amdgpu_top: https://github.com/Umio-Yasuno/amdgpu_top/blob/c961cf6625c4b6d63fda7f03348323048563c584/crates/libamdgpu_top/src/stat/fdinfo/proc_info.rs#L114
fn diff_usage(pre: u64, cur: u64, interval: &Duration) -> u64 {
use std::ops::Mul;
Expand Down Expand Up @@ -300,7 +223,7 @@ fn get_amdgpu_drm(device_path: &Path) -> Option<Vec<PathBuf>> {
}
}

fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AmdGpuProc>> {
let mut fdinfo = HashMap::new();

let drm_paths = get_amdgpu_drm(device_path)?;
Expand Down Expand Up @@ -336,7 +259,7 @@ fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
continue;
};

let mut usage: AMDGPUProc = Default::default();
let mut usage: AmdGpuProc = Default::default();

let mut observed_ids: HashSet<usize> = HashSet::new();

Expand Down Expand Up @@ -401,20 +324,17 @@ fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
Some(fdinfo)
}

pub fn get_amd_vecs(
filter: &Option<Filter>, widgets_to_harvest: &UsedWidgets, prev_time: Instant,
) -> Option<AMDGPUData> {
pub fn get_amd_vecs(widgets_to_harvest: &UsedWidgets, prev_time: Instant) -> Option<AmdGpuData> {
let device_path_list = get_amd_devs()?;
let interval = Instant::now().duration_since(prev_time);
let num_gpu = device_path_list.len();
let mut temp_vec = Vec::with_capacity(num_gpu);
let mut mem_vec = Vec::with_capacity(num_gpu);
let mut proc_vec = Vec::with_capacity(num_gpu);
let mut total_mem = 0;

for device_path in device_path_list {
let device_name =
get_amd_name(&device_path).unwrap_or(amdgpu_marketing::AMDGPU_DEFAULT_NAME.to_string());
let device_name = get_amd_name(&device_path)
.unwrap_or(amd_gpu_marketing::AMDGPU_DEFAULT_NAME.to_string());

if let Some(mem) = get_amd_vram(&device_path) {
if widgets_to_harvest.use_mem {
Expand All @@ -432,18 +352,6 @@ pub fn get_amd_vecs(
total_mem += mem.total
}

// TODO: Not sure if this overlaps with the existing generic temperature code.
if widgets_to_harvest.use_temp && Filter::optional_should_keep(filter, &device_name) {
if let Some(temperatures) = get_amd_temp(&device_path) {
for info in temperatures {
temp_vec.push(TempSensorData {
name: format!("{} {}", device_name, info.name),
temperature: Some(info.temperature),
});
}
}
}

if widgets_to_harvest.use_proc {
if let Some(procs) = get_amd_fdinfo(&device_path) {
let mut proc_info = PROC_DATA.lock().unwrap();
Expand Down Expand Up @@ -497,9 +405,8 @@ pub fn get_amd_vecs(
}
}

Some(AMDGPUData {
Some(AmdGpuData {
memory: (!mem_vec.is_empty()).then_some(mem_vec),
temperature: (!temp_vec.is_empty()).then_some(temp_vec),
procs: (!proc_vec.is_empty()).then_some((total_mem, proc_vec)),
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

pub const AMDGPU_DEFAULT_NAME: &str = "AMD Radeon Graphics";

pub const AMDGPU_MARKETING_NAME: &[(u32, u32, &str)] = &[
pub const AMD_GPU_MARKETING_NAME: &[(u32, u32, &str)] = &[
(0x6798, 0x00, "AMD Radeon R9 200 / HD 7900 Series"),
(0x6799, 0x00, "AMD Radeon HD 7900 Series"),
(0x679A, 0x00, "AMD Radeon HD 7900 Series"),
Expand Down
30 changes: 30 additions & 0 deletions src/collection/linux/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use std::{fs, path::Path};

/// Whether the temperature should *actually* be read during enumeration.
/// Will return false if the state is not D0/unknown, or if it does not support
/// `device/power_state`.
///
/// `path` is a path to the device itself (e.g. `/sys/class/hwmon/hwmon1/device`).
#[inline]
pub fn is_device_awake(device: &Path) -> bool {
// Whether the temperature should *actually* be read during enumeration.
// Set to false if the device is in ACPI D3cold.
// Documented at https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power_state
let power_state = device.join("power_state");
if power_state.exists() {
if let Ok(state) = fs::read_to_string(power_state) {
let state = state.trim();
// The zenpower3 kernel module (incorrectly?) reports "unknown", causing this
// check to fail and temperatures to appear as zero instead of
// having the file not exist.
//
// Their self-hosted git instance has disabled sign up, so this bug cant be
// reported either.
state == "D0" || state == "unknown"
} else {
true
}
} else {
true
}
}
4 changes: 2 additions & 2 deletions src/collection/temperature.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Data collection for temperature metrics.
//!
//! For Linux and macOS, this is handled by Heim.
//! For Windows, this is handled by sysinfo.
//! For Linux, this is handled by custom code.
//! For everything else, this is handled by sysinfo.

cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
Expand Down
Loading
Loading