From 27cf264f675e0ba01939c5a4505aa823a427ff90 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sun, 12 Jan 2025 15:54:38 -0500 Subject: [PATCH] Enforce compiler-builtins CGU partitioning in the compiler --- .../src/cross_crate_inline.rs | 9 +++++++++ compiler/rustc_monomorphize/src/partitioning.rs | 17 +++++++++++++++++ library/Cargo.toml | 13 ------------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/compiler/rustc_mir_transform/src/cross_crate_inline.rs b/compiler/rustc_mir_transform/src/cross_crate_inline.rs index 8fce856687cb8..9e5d4a436584d 100644 --- a/compiler/rustc_mir_transform/src/cross_crate_inline.rs +++ b/compiler/rustc_mir_transform/src/cross_crate_inline.rs @@ -34,6 +34,15 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool { return true; } + // compiler-builtins only defines intrinsics (which are handled above by checking + // contains_extern_indicator) and helper functions used by those intrinsics. The helper + // functions should always be inlined into intrinsics that use them. This check does not + // guarantee that we get the optimizations we want, but it makes them *much* easier. + // See https://github.com/rust-lang/rust/issues/73135 + if tcx.is_compiler_builtins(rustc_span::def_id::LOCAL_CRATE) { + return true; + } + if tcx.has_attr(def_id, sym::rustc_intrinsic) { // Intrinsic fallback bodies are always cross-crate inlineable. // To ensure that the MIR inliner doesn't cluelessly try to inline fallback diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index 614a1f57d64a2..6facb0dd6ecee 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -319,6 +319,13 @@ fn merge_codegen_units<'tcx>( let mut cgu_contents: UnordMap> = codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect(); + // When compiling compiler_builtins, we do not want to put multiple intrinsics in a CGU. + // There may be mergeable CGUs under this constraint, but just skipping over merging is much + // simpler. + if cx.tcx.is_compiler_builtins(LOCAL_CRATE) { + return cgu_contents; + } + // If N is the maximum number of CGUs, and the CGUs are sorted from largest // to smallest, we repeatedly find which CGU in codegen_units[N..] has the // greatest overlap of inlined items with codegen_units[N-1], merge that @@ -680,6 +687,16 @@ fn compute_codegen_unit_name<'tcx>( mono_item: MonoItem<'tcx>, cache: &mut CguNameCache, ) -> Symbol { + // When compiling compiler_builtins, we do not want to put multiple intrinsics in a CGU. + // Using the symbol name as the CGU name puts every GloballyShared item in its own CGU, but in + // an optimized build we actually want every item in the crate that isn't an intrinsic to get + // LocalCopy so that it is easy to inline away. In an unoptimized build, this CGU naming + // strategy probably generates more CGUs than we strictly need. But it is simple. + if tcx.is_compiler_builtins(LOCAL_CRATE) { + let name = mono_item.symbol_name(tcx); + return Symbol::intern(name.name); + } + let Some(def_id) = characteristic_def_id_of_mono_item(tcx, mono_item) else { return fallback_cgu_name(name_builder); }; diff --git a/library/Cargo.toml b/library/Cargo.toml index e59aa518804f3..9fcddfc019bb2 100644 --- a/library/Cargo.toml +++ b/library/Cargo.toml @@ -11,19 +11,6 @@ exclude = [ "windows_targets" ] -[profile.release.package.compiler_builtins] -# For compiler-builtins we always use a high number of codegen units. -# The goal here is to place every single intrinsic into its own object -# file to avoid symbol clashes with the system libgcc if possible. Note -# that this number doesn't actually produce this many object files, we -# just don't create more than this number of object files. -# -# It's a bit of a bummer that we have to pass this here, unfortunately. -# Ideally this would be specified through an env var to Cargo so Cargo -# knows how many CGUs are for this specific crate, but for now -# per-crate configuration isn't specifiable in the environment. -codegen-units = 10000 - # These dependencies of the standard library implement symbolication for # backtraces on most platforms. Their debuginfo causes both linking to be slower # (more data to chew through) and binaries to be larger without really all that