-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[msan] Apply handleVectorReduceIntrinsic to max/min vector instructions #129819
Conversation
Changes the handling of: - llvm.aarch64.neon.smaxv - llvm.aarch64.neon.sminv - llvm.aarch64.neon.umaxv - llvm.aarch64.neon.uminv - llvm.vector.reduce.smax - llvm.vector.reduce.smin - llvm.vector.reduce.umax - llvm.vector.reduce.umin - llvm.vector.reduce.fmax - llvm.vector.reduce.fmin from the default strict handling (visitInstruction) to handleVectorReduceIntrinsic. Updates the tests from llvm#129741, llvm#129810, llvm#129768
@llvm/pr-subscribers-llvm-transforms Author: Thurston Dang (thurstond) ChangesChanges the handling of:
Also adds a parameter to handleVectorReduceIntrinsic to specify whether the return type must match the elements of the vector. Updates the tests from #129741, #129810, #129768 Patch is 84.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129819.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 52e42932fc751..fdf20218635f0 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3653,14 +3653,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Instrument generic vector reduction intrinsics
// by ORing together all their fields.
//
- // The return type does not need to be the same type as the fields
+ // If AllowShadowCast is true, the return type does not need to be the same
+ // type as the fields
// e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
- void handleVectorReduceIntrinsic(IntrinsicInst &I) {
+ void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) {
assert(I.arg_size() == 1);
IRBuilder<> IRB(&I);
Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
- S = CreateShadowCast(IRB, S, getShadowTy(&I));
+ if (AllowShadowCast)
+ S = CreateShadowCast(IRB, S, getShadowTy(&I));
+ else
+ assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
@@ -3669,13 +3673,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
// %a1)
// shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
+ //
+ // The type of the return value, initial starting value, and elements of the
+ // vector must be identical.
void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
assert(I.arg_size() == 2);
IRBuilder<> IRB(&I);
Value *Shadow0 = getShadow(&I, 0);
Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1));
+ assert(Shadow0->getType() == Shadow1->getType());
Value *S = IRB.CreateOr(Shadow0, Shadow1);
+ assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
@@ -4458,21 +4467,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_mul:
- // Add reduction to scalar
- case Intrinsic::aarch64_neon_faddv:
- case Intrinsic::aarch64_neon_saddv:
- case Intrinsic::aarch64_neon_uaddv:
- // Floating-point min/max (vector)
- // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
- // but our shadow propagation is the same.
- case Intrinsic::aarch64_neon_fmaxv:
- case Intrinsic::aarch64_neon_fminv:
- case Intrinsic::aarch64_neon_fmaxnmv:
- case Intrinsic::aarch64_neon_fminnmv:
- // Sum long across vector
- case Intrinsic::aarch64_neon_saddlv:
- case Intrinsic::aarch64_neon_uaddlv:
- handleVectorReduceIntrinsic(I);
+ // Signed/Unsigned Min/Max
+ // TODO: handling similarly to AND/OR may be more precise.
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ // TODO: this has no false positives, but arguably we should check that all
+ // the bits are initialized.
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false);
break;
case Intrinsic::vector_reduce_fadd:
@@ -4903,6 +4908,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ // Add reduction to scalar
+ case Intrinsic::aarch64_neon_faddv:
+ case Intrinsic::aarch64_neon_saddv:
+ case Intrinsic::aarch64_neon_uaddv:
+ // Signed/Unsigned min/max (Vector)
+ // TODO: handling similarly to AND/OR may be more precise.
+ case Intrinsic::aarch64_neon_smaxv:
+ case Intrinsic::aarch64_neon_sminv:
+ case Intrinsic::aarch64_neon_umaxv:
+ case Intrinsic::aarch64_neon_uminv:
+ // Floating-point min/max (vector)
+ // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
+ // but our shadow propagation is the same.
+ case Intrinsic::aarch64_neon_fmaxv:
+ case Intrinsic::aarch64_neon_fminv:
+ case Intrinsic::aarch64_neon_fmaxnmv:
+ case Intrinsic::aarch64_neon_fminnmv:
+ // Sum long across vector
+ case Intrinsic::aarch64_neon_saddlv:
+ case Intrinsic::aarch64_neon_uaddlv:
+ handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
+ break;
+
// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
index cbbd55d3e3497..632268e08022c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
@@ -2,9 +2,6 @@
; RUN: opt < %s -passes=msan -S | FileCheck %s
;
; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll
-;
-; Handled suboptimally (visitInstruction):
-; - llvm.aarch64.neon.smaxv
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"
@@ -15,16 +12,12 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
@@ -39,16 +32,12 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
-; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
@@ -63,15 +52,9 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
@@ -85,16 +68,12 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
@@ -109,16 +88,12 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
-; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
@@ -133,15 +108,9 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
@@ -156,16 +125,12 @@ define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i8> [[TMP7]]
@@ -184,16 +149,12 @@ define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP7]]
@@ -212,15 +173,9 @@ define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A2]])
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP6]]
@@ -238,16 +193,12 @@ define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[TMP7]]
@@ -266,16 +217,12 @@ define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP7]]
@@ -294,15 +241,9 @@ define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A2]])
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll b/llvm/test/Instrumentation/...
[truncated]
|
@llvm/pr-subscribers-compiler-rt-sanitizer Author: Thurston Dang (thurstond) ChangesChanges the handling of:
Also adds a parameter to handleVectorReduceIntrinsic to specify whether the return type must match the elements of the vector. Updates the tests from #129741, #129810, #129768 Patch is 84.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129819.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 52e42932fc751..fdf20218635f0 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3653,14 +3653,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Instrument generic vector reduction intrinsics
// by ORing together all their fields.
//
- // The return type does not need to be the same type as the fields
+ // If AllowShadowCast is true, the return type does not need to be the same
+ // type as the fields
// e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
- void handleVectorReduceIntrinsic(IntrinsicInst &I) {
+ void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) {
assert(I.arg_size() == 1);
IRBuilder<> IRB(&I);
Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
- S = CreateShadowCast(IRB, S, getShadowTy(&I));
+ if (AllowShadowCast)
+ S = CreateShadowCast(IRB, S, getShadowTy(&I));
+ else
+ assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
@@ -3669,13 +3673,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
// %a1)
// shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
+ //
+ // The type of the return value, initial starting value, and elements of the
+ // vector must be identical.
void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
assert(I.arg_size() == 2);
IRBuilder<> IRB(&I);
Value *Shadow0 = getShadow(&I, 0);
Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1));
+ assert(Shadow0->getType() == Shadow1->getType());
Value *S = IRB.CreateOr(Shadow0, Shadow1);
+ assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
@@ -4458,21 +4467,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_mul:
- // Add reduction to scalar
- case Intrinsic::aarch64_neon_faddv:
- case Intrinsic::aarch64_neon_saddv:
- case Intrinsic::aarch64_neon_uaddv:
- // Floating-point min/max (vector)
- // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
- // but our shadow propagation is the same.
- case Intrinsic::aarch64_neon_fmaxv:
- case Intrinsic::aarch64_neon_fminv:
- case Intrinsic::aarch64_neon_fmaxnmv:
- case Intrinsic::aarch64_neon_fminnmv:
- // Sum long across vector
- case Intrinsic::aarch64_neon_saddlv:
- case Intrinsic::aarch64_neon_uaddlv:
- handleVectorReduceIntrinsic(I);
+ // Signed/Unsigned Min/Max
+ // TODO: handling similarly to AND/OR may be more precise.
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ // TODO: this has no false positives, but arguably we should check that all
+ // the bits are initialized.
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false);
break;
case Intrinsic::vector_reduce_fadd:
@@ -4903,6 +4908,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ // Add reduction to scalar
+ case Intrinsic::aarch64_neon_faddv:
+ case Intrinsic::aarch64_neon_saddv:
+ case Intrinsic::aarch64_neon_uaddv:
+ // Signed/Unsigned min/max (Vector)
+ // TODO: handling similarly to AND/OR may be more precise.
+ case Intrinsic::aarch64_neon_smaxv:
+ case Intrinsic::aarch64_neon_sminv:
+ case Intrinsic::aarch64_neon_umaxv:
+ case Intrinsic::aarch64_neon_uminv:
+ // Floating-point min/max (vector)
+ // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
+ // but our shadow propagation is the same.
+ case Intrinsic::aarch64_neon_fmaxv:
+ case Intrinsic::aarch64_neon_fminv:
+ case Intrinsic::aarch64_neon_fmaxnmv:
+ case Intrinsic::aarch64_neon_fminnmv:
+ // Sum long across vector
+ case Intrinsic::aarch64_neon_saddlv:
+ case Intrinsic::aarch64_neon_uaddlv:
+ handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
+ break;
+
// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
index cbbd55d3e3497..632268e08022c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
@@ -2,9 +2,6 @@
; RUN: opt < %s -passes=msan -S | FileCheck %s
;
; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll
-;
-; Handled suboptimally (visitInstruction):
-; - llvm.aarch64.neon.smaxv
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"
@@ -15,16 +12,12 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
@@ -39,16 +32,12 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
-; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
@@ -63,15 +52,9 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
@@ -85,16 +68,12 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
@@ -109,16 +88,12 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
-; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
@@ -133,15 +108,9 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
@@ -156,16 +125,12 @@ define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i8> [[TMP7]]
@@ -184,16 +149,12 @@ define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP7]]
@@ -212,15 +173,9 @@ define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A2]])
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP6]]
@@ -238,16 +193,12 @@ define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[TMP7]]
@@ -266,16 +217,12 @@ define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP7]]
@@ -294,15 +241,9 @@ define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A2]])
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll b/llvm/test/Instrumentation/...
[truncated]
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/65/builds/13448 Here is the relevant piece of the build log for the reference
|
Changes the handling of:
from the default strict handling (visitInstruction) to handleVectorReduceIntrinsic.
Also adds a parameter to handleVectorReduceIntrinsic to specify whether the return type must match the elements of the vector.
Updates the tests from #129741, #129810, #129768