From 472230d192b8bace6ddbe825d68ccd27d0d5ef1d Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 30 May 2023 00:34:50 -0700 Subject: [PATCH 1/2] Remove array_zip `[T; N]::zip` is "eager" but most zips are mapped. This causes poor optimization in generated code. This is a fundamental design issue and "zip" is "prime real estate" in terms of function names, so let's free it up again. --- library/core/src/array/mod.rs | 23 ----------------------- tests/codegen/array-map.rs | 11 ----------- tests/codegen/autovectorize-f32x4.rs | 11 ----------- 3 files changed, 45 deletions(-) diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index fec92320a4b5e..76b3589b9e4b3 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -538,29 +538,6 @@ impl [T; N] { drain_array_with(self, |iter| try_from_trusted_iterator(iter.map(f))) } - /// 'Zips up' two arrays into a single array of pairs. - /// - /// `zip()` returns a new array where every element is a tuple where the - /// first element comes from the first array, and the second element comes - /// from the second array. In other words, it zips two arrays together, - /// into a single one. - /// - /// # Examples - /// - /// ``` - /// #![feature(array_zip)] - /// let x = [1, 2, 3]; - /// let y = [4, 5, 6]; - /// let z = x.zip(y); - /// assert_eq!(z, [(1, 4), (2, 5), (3, 6)]); - /// ``` - #[unstable(feature = "array_zip", issue = "80094")] - pub fn zip(self, rhs: [U; N]) -> [(T, U); N] { - drain_array_with(self, |lhs| { - drain_array_with(rhs, |rhs| from_trusted_iterator(crate::iter::zip(lhs, rhs))) - }) - } - /// Returns a slice containing the entire array. Equivalent to `&s[..]`. #[stable(feature = "array_as_slice", since = "1.57.0")] #[rustc_const_stable(feature = "array_as_slice", since = "1.57.0")] diff --git a/tests/codegen/array-map.rs b/tests/codegen/array-map.rs index 3706ddf99fd90..24f3f43d07874 100644 --- a/tests/codegen/array-map.rs +++ b/tests/codegen/array-map.rs @@ -4,7 +4,6 @@ // ignore-debug (the extra assertions get in the way) #![crate_type = "lib"] -#![feature(array_zip)] // CHECK-LABEL: @short_integer_map #[no_mangle] @@ -16,16 +15,6 @@ pub fn short_integer_map(x: [u32; 8]) -> [u32; 8] { x.map(|x| 2 * x + 1) } -// CHECK-LABEL: @short_integer_zip_map -#[no_mangle] -pub fn short_integer_zip_map(x: [u32; 8], y: [u32; 8]) -> [u32; 8] { - // CHECK: %[[A:.+]] = load <8 x i32> - // CHECK: %[[B:.+]] = load <8 x i32> - // CHECK: sub <8 x i32> %[[B]], %[[A]] - // CHECK: store <8 x i32> - x.zip(y).map(|(x, y)| x - y) -} - // This test is checking that LLVM can SRoA away a bunch of the overhead, // like fully moving the iterators to registers. Notably, previous implementations // of `map` ended up `alloca`ing the whole `array::IntoIterator`, meaning both a diff --git a/tests/codegen/autovectorize-f32x4.rs b/tests/codegen/autovectorize-f32x4.rs index 9ecea53f1c05c..474ff1c4e91b9 100644 --- a/tests/codegen/autovectorize-f32x4.rs +++ b/tests/codegen/autovectorize-f32x4.rs @@ -1,7 +1,6 @@ // compile-flags: -C opt-level=3 -Z merge-functions=disabled // only-x86_64 #![crate_type = "lib"] -#![feature(array_zip)] // CHECK-LABEL: @auto_vectorize_direct #[no_mangle] @@ -31,13 +30,3 @@ pub fn auto_vectorize_loop(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { } c } - -// CHECK-LABEL: @auto_vectorize_array_zip_map -#[no_mangle] -pub fn auto_vectorize_array_zip_map(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { -// CHECK: load <4 x float> -// CHECK: load <4 x float> -// CHECK: fadd <4 x float> -// CHECK: store <4 x float> - a.zip(b).map(|(a, b)| a + b) -} From 374f5a8091f5dadff364cda8b19f8806e268e984 Mon Sep 17 00:00:00 2001 From: ScottMcMurray Date: Tue, 30 May 2023 20:38:07 -0700 Subject: [PATCH 2/2] Test from_fn autovectorizes --- tests/codegen/autovectorize-f32x4.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/codegen/autovectorize-f32x4.rs b/tests/codegen/autovectorize-f32x4.rs index 474ff1c4e91b9..54392be707f53 100644 --- a/tests/codegen/autovectorize-f32x4.rs +++ b/tests/codegen/autovectorize-f32x4.rs @@ -30,3 +30,13 @@ pub fn auto_vectorize_loop(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { } c } + +// CHECK-LABEL: @auto_vectorize_array_from_fn +#[no_mangle] +pub fn auto_vectorize_array_from_fn(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { +// CHECK: load <4 x float> +// CHECK: load <4 x float> +// CHECK: fadd <4 x float> +// CHECK: store <4 x float> + std::array::from_fn(|i| a[i] + b[i]) +}