Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aarch64: Support FEAT_LSFE #201

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ lcgr
ldar
ldaxp
ldclrp
ldfadd
ldfmax
ldfmin
ldiapp
ldrexd
ldsetp
Expand Down Expand Up @@ -188,6 +191,7 @@ versatilepb
virt
vmlinux
vmovdqa
vreg
vtable
vtables
wokwi
Expand Down
24 changes: 15 additions & 9 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,18 @@ fn main() {

if version.minor >= 80 {
println!(
r#"cargo:rustc-check-cfg=cfg(target_feature,values("experimental-zacas","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
r#"cargo:rustc-check-cfg=cfg(target_feature,values("lsfe","experimental-zacas","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
);

// Custom cfgs set by build script. Not public API.
// grep -F 'cargo:rustc-cfg=' build.rs | grep -Ev '^ *//' | sed -E 's/^.*cargo:rustc-cfg=//; s/(=\\)?".*$//' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/'
println!(
"cargo:rustc-check-cfg=cfg(portable_atomic_disable_fiq,portable_atomic_force_amo,portable_atomic_ll_sc_rmw,portable_atomic_new_atomic_intrinsics,portable_atomic_no_asm,portable_atomic_no_asm_maybe_uninit,portable_atomic_no_atomic_64,portable_atomic_no_atomic_cas,portable_atomic_no_atomic_load_store,portable_atomic_no_atomic_min_max,portable_atomic_no_cfg_target_has_atomic,portable_atomic_no_cmpxchg16b_intrinsic,portable_atomic_no_cmpxchg16b_target_feature,portable_atomic_no_const_mut_refs,portable_atomic_no_const_raw_ptr_deref,portable_atomic_no_const_transmute,portable_atomic_no_core_unwind_safe,portable_atomic_no_diagnostic_namespace,portable_atomic_no_offset_of,portable_atomic_no_strict_provenance,portable_atomic_no_stronger_failure_ordering,portable_atomic_no_track_caller,portable_atomic_no_unsafe_op_in_unsafe_fn,portable_atomic_pre_llvm_15,portable_atomic_pre_llvm_16,portable_atomic_pre_llvm_18,portable_atomic_s_mode,portable_atomic_sanitize_thread,portable_atomic_target_feature,portable_atomic_unsafe_assume_single_core,portable_atomic_unstable_asm,portable_atomic_unstable_asm_experimental_arch,portable_atomic_unstable_cfg_target_has_atomic,portable_atomic_unstable_isa_attribute)"
"cargo:rustc-check-cfg=cfg(portable_atomic_disable_fiq,portable_atomic_force_amo,portable_atomic_ll_sc_rmw,portable_atomic_new_atomic_intrinsics,portable_atomic_no_asm,portable_atomic_no_asm_maybe_uninit,portable_atomic_no_atomic_64,portable_atomic_no_atomic_cas,portable_atomic_no_atomic_load_store,portable_atomic_no_atomic_min_max,portable_atomic_no_cfg_target_has_atomic,portable_atomic_no_cmpxchg16b_intrinsic,portable_atomic_no_cmpxchg16b_target_feature,portable_atomic_no_const_mut_refs,portable_atomic_no_const_raw_ptr_deref,portable_atomic_no_const_transmute,portable_atomic_no_core_unwind_safe,portable_atomic_no_diagnostic_namespace,portable_atomic_no_offset_of,portable_atomic_no_strict_provenance,portable_atomic_no_stronger_failure_ordering,portable_atomic_no_track_caller,portable_atomic_no_unsafe_op_in_unsafe_fn,portable_atomic_pre_llvm_15,portable_atomic_pre_llvm_16,portable_atomic_pre_llvm_18,portable_atomic_pre_llvm_20,portable_atomic_s_mode,portable_atomic_sanitize_thread,portable_atomic_target_feature,portable_atomic_unsafe_assume_single_core,portable_atomic_unstable_asm,portable_atomic_unstable_asm_experimental_arch,portable_atomic_unstable_cfg_target_has_atomic,portable_atomic_unstable_isa_attribute)"
);
// TODO: handle multi-line target_feature_fallback
// grep -F 'target_feature_fallback("' build.rs | grep -Ev '^ *//' | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/'
println!(
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","experimental-zacas","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","zabha"))"#
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","experimental-zacas","fast-serialization","load-store-on-cond","lse","lse128","lse2","lsfe","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","zabha"))"#
);
}

Expand Down Expand Up @@ -206,12 +206,15 @@ fn main() {
println!("cargo:rustc-cfg=portable_atomic_no_atomic_load_store");
}

if version.llvm < 18 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_18");
if version.llvm < 16 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_16");
if version.llvm < 15 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_15");
if version.llvm < 20 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_20");
if version.llvm < 18 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_18");
if version.llvm < 16 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_16");
if version.llvm < 15 {
println!("cargo:rustc-cfg=portable_atomic_pre_llvm_15");
}
}
}
}
Expand Down Expand Up @@ -282,6 +285,9 @@ fn main() {
target_feature_fallback("lse", lse);
}
}
// As of rustc 1.84, target_feature "lsfe" is not available on rustc side:
// https://github.com/rust-lang/rust/blob/1.84.0/compiler/rustc_target/src/target_features.rs
target_feature_fallback("lsfe", false);

// As of Apple M1/M1 Pro, on Apple hardware, CAS-loop-based RMW is much slower than
// LL/SC-loop-based RMW: https://github.com/taiki-e/portable-atomic/pull/89
Expand Down
8 changes: 4 additions & 4 deletions src/imp/atomic128/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,10 +435,10 @@ macro_rules! atomic_rmw_inst {
};
($op:ident, $order:ident, write = $write:ident) => {
match $order {
Ordering::Relaxed => $op!("2", ""),
Ordering::Acquire => $op!("a", ""),
Ordering::Release => $op!("6", ""),
Ordering::AcqRel => $op!("e", ""),
Ordering::Relaxed => $op!("2", ""), // ""
Ordering::Acquire => $op!("a", ""), // "a"
Ordering::Release => $op!("6", ""), // "l"
Ordering::AcqRel => $op!("e", ""), // "al"
// In MSVC environments, SeqCst stores/writes needs fences after writes.
// https://reviews.llvm.org/D141748
#[cfg(target_env = "msvc")]
Expand Down
60 changes: 55 additions & 5 deletions src/imp/detect/aarch64_aa64reg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,20 @@ include!("common.rs");
struct AA64Reg {
aa64isar0: u64,
aa64isar1: u64,
#[cfg(test)]
aa64isar3: u64,
aa64mmfr2: u64,
}

#[cold]
fn _detect(info: &mut CpuInfo) {
let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
let AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
aa64isar3,
aa64mmfr2,
} = imp::aa64reg();

// ID_AA64ISAR0_EL1, AArch64 Instruction Set Attribute Register 0
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0
Expand All @@ -65,6 +73,12 @@ fn _detect(info: &mut CpuInfo) {
if extract(aa64isar1, 23, 20) >= 0b0011 {
info.set(CpuInfo::HAS_RCPC3);
}
#[cfg(test)]
// ID_AA64ISAR3_EL1, AArch64 Instruction Set Attribute Register 3
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR3-EL1--AArch64-Instruction-Set-Attribute-Register-3
if extract(aa64isar3, 19, 16) >= 0b0001 {
info.set(CpuInfo::HAS_LSFE);
}
// ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2
if extract(aa64mmfr2, 35, 32) >= 0b0001 {
Expand Down Expand Up @@ -102,13 +116,27 @@ mod imp {
out(reg) aa64isar1,
options(pure, nomem, nostack, preserves_flags),
);
#[cfg(test)]
let aa64isar3: u64;
#[cfg(test)]
asm!(
"mrs {0}, ID_AA64ISAR3_EL1",
out(reg) aa64isar3,
options(pure, nomem, nostack, preserves_flags),
);
let aa64mmfr2: u64;
asm!(
"mrs {0}, ID_AA64MMFR2_EL1",
out(reg) aa64mmfr2,
options(pure, nomem, nostack, preserves_flags),
);
AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 }
AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
aa64isar3,
aa64mmfr2,
}
}
}
}
Expand Down Expand Up @@ -200,6 +228,8 @@ mod imp {
Some(AA64Reg {
aa64isar0: buf.ac_aa64isar0,
aa64isar1: buf.ac_aa64isar1,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2: buf.ac_aa64mmfr2,
})
}
Expand All @@ -213,7 +243,13 @@ mod imp {
// https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
match sysctl_cpu_id(c!("machdep.cpu0.cpu_id")) {
Some(cpu_id) => cpu_id,
None => AA64Reg { aa64isar0: 0, aa64isar1: 0, aa64mmfr2: 0 },
None => AA64Reg {
aa64isar0: 0,
aa64isar1: 0,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2: 0,
},
}
}
}
Expand Down Expand Up @@ -273,7 +309,13 @@ mod imp {
let aa64isar0 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR0]).unwrap_or(0);
let aa64isar1 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR1]).unwrap_or(0);
let aa64mmfr2 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64MMFR2]).unwrap_or(0);
AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 }
AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2,
}
}

fn sysctl64(mib: &[ffi::c_int]) -> Option<u64> {
Expand Down Expand Up @@ -322,9 +364,10 @@ mod tests {

#[test]
fn test_aa64reg() {
let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
let AA64Reg { aa64isar0, aa64isar1, aa64isar3, aa64mmfr2 } = imp::aa64reg();
std::eprintln!("aa64isar0={}", aa64isar0);
std::eprintln!("aa64isar1={}", aa64isar1);
std::eprintln!("aa64isar3={}", aa64isar3);
std::eprintln!("aa64mmfr2={}", aa64mmfr2);
if cfg!(target_os = "openbsd") {
let output = Command::new("sysctl").arg("machdep").output().unwrap();
Expand Down Expand Up @@ -361,6 +404,12 @@ mod tests {
} else {
assert!(lrcpc < 0b0011, "{}", lrcpc);
}
let lsfe = extract(aa64isar3, 19, 16);
if detect().test(CpuInfo::HAS_LSFE) {
assert_eq!(lsfe, 0b0001);
} else {
assert_eq!(lsfe, 0b0000);
}
let at = extract(aa64mmfr2, 35, 32);
if detect().test(CpuInfo::HAS_LSE2) {
assert_eq!(at, 0b0001);
Expand Down Expand Up @@ -496,6 +545,7 @@ mod tests {
Ok(AA64Reg {
aa64isar0: buf.ac_aa64isar0,
aa64isar1: buf.ac_aa64isar1,
aa64isar3: 0,
aa64mmfr2: buf.ac_aa64mmfr2,
})
}
Expand Down
7 changes: 7 additions & 0 deletions src/imp/detect/aarch64_apple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ fn _detect(info: &mut CpuInfo) {
if sysctlbyname32(c!("hw.optional.arm.FEAT_LSE128")).unwrap_or(0) != 0 {
info.set(CpuInfo::HAS_LSE128);
}
#[cfg(test)]
if sysctlbyname32(c!("hw.optional.arm.FEAT_LSFE")).unwrap_or(0) != 0 {
info.set(CpuInfo::HAS_LSFE);
}
if sysctlbyname32(c!("hw.optional.arm.FEAT_LRCPC3")).unwrap_or(0) != 0 {
info.set(CpuInfo::HAS_RCPC3);
}
Expand All @@ -108,6 +112,8 @@ mod tests {
assert_eq!(sysctlbyname32(c!("hw.optional.arm.FEAT_LSE2")), Some(1));
assert_eq!(sysctlbyname32(c!("hw.optional.arm.FEAT_LSE128")), None);
assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound);
assert_eq!(sysctlbyname32(c!("hw.optional.arm.FEAT_LSFE")), None);
assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound);
assert_eq!(sysctlbyname32(c!("hw.optional.arm.FEAT_LRCPC")), Some(1));
assert_eq!(sysctlbyname32(c!("hw.optional.arm.FEAT_LRCPC2")), Some(1));
assert_eq!(sysctlbyname32(c!("hw.optional.arm.FEAT_LRCPC3")), None);
Expand Down Expand Up @@ -234,6 +240,7 @@ mod tests {
c!("hw.optional.arm.FEAT_LSE"),
c!("hw.optional.arm.FEAT_LSE2"),
c!("hw.optional.arm.FEAT_LSE128"),
c!("hw.optional.arm.FEAT_LSFE"),
c!("hw.optional.arm.FEAT_LRCPC"),
c!("hw.optional.arm.FEAT_LRCPC2"),
c!("hw.optional.arm.FEAT_LRCPC3"),
Expand Down
12 changes: 12 additions & 0 deletions src/imp/detect/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ flags! {
// > If FEAT_LSE128 is implemented, then FEAT_LSE is implemented.
#[cfg_attr(not(test), allow(dead_code))]
HAS_LSE128(has_lse128, "lse128", any(target_feature, portable_atomic_target_feature)),
// FEAT_LSFE, Large System Float Extension
// https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv9-6-architecture-extension
// > This feature is supported in AArch64 state only.
// > FEAT_LSFE is OPTIONAL from Armv9.3.
// > If FEAT_LSFE is implemented, then FEAT_FP is implemented.
#[cfg(test)]
HAS_LSFE(has_lsfe, "lsfe", any(target_feature, portable_atomic_target_feature)),
}

#[cfg(target_arch = "powerpc64")]
Expand Down Expand Up @@ -398,6 +405,11 @@ mod tests_common {
assert!(!lse128);
}
}
if detect().has_lsfe() {
assert!(detect().test(CpuInfo::HAS_LSFE));
} else {
assert!(!detect().test(CpuInfo::HAS_LSFE));
}
if detect().has_rcpc3() {
assert!(detect().test(CpuInfo::HAS_RCPC3));
if let Ok(test_helper::cpuinfo::ProcCpuinfo { rcpc3: Some(rcpc3), .. }) = proc_cpuinfo {
Expand Down
Loading
Loading