-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[-Wunsafe-buffer-usage] Add alloc_size knowledge to the 2-param span constructor warning #114894
[-Wunsafe-buffer-usage] Add alloc_size knowledge to the 2-param span constructor warning #114894
Conversation
@llvm/pr-subscribers-clang-analysis @llvm/pr-subscribers-clang Author: Ziqing Luo (ziqingluo-90) ChangesWe can take advantage of the attribute
rdar://136634730 Full diff: https://github.com/llvm/llvm-project/pull/114894.diff 2 Files Affected:
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 2c68409b846bc8..66e622fe2d714e 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -7,7 +7,9 @@
//===----------------------------------------------------------------------===//
#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
+#include "clang/AST/APValue.h"
#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/AST/FormatString.h"
@@ -358,6 +360,63 @@ isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {
return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse));
}
+// Returns true iff integer E1 is equivalent to integer E2.
+//
+// For now we only support such expressions:
+// expr := DRE | const-value | expr BO expr
+// BO := '*' | '+'
+static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx);
+static bool areEqualIntegralBinaryOperators(const BinaryOperator *E1,
+ const Expr *E2_LHS,
+ BinaryOperatorKind BOP,
+ const Expr *E2_RHS,
+ ASTContext &Ctx) {
+ if (E1->getOpcode() == BOP) {
+ switch (BOP) {
+ // Commutative operators:
+ case BO_Mul:
+ case BO_Add:
+ return (areEqualIntegers(E1->getLHS(), E2_LHS, Ctx) &&
+ areEqualIntegers(E1->getRHS(), E2_RHS, Ctx)) ||
+ (areEqualIntegers(E1->getLHS(), E2_RHS, Ctx) &&
+ areEqualIntegers(E1->getRHS(), E2_LHS, Ctx));
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx) {
+ E1 = E1->IgnoreParenImpCasts();
+ E2 = E2->IgnoreParenImpCasts();
+ if (!E1->getType()->isIntegerType() || E1->getType() != E2->getType())
+ return false;
+
+ Expr::EvalResult ER1, ER2;
+
+ // If both are constants:
+ if (E1->EvaluateAsConstantExpr(ER1, Ctx) &&
+ E2->EvaluateAsConstantExpr(ER2, Ctx))
+ return ER1.Val.getInt() == ER2.Val.getInt();
+
+ // Otherwise, they should have identical stmt kind:
+ if (E1->getStmtClass() != E2->getStmtClass())
+ return false;
+ switch (E1->getStmtClass()) {
+ case Stmt::DeclRefExprClass:
+ return cast<DeclRefExpr>(E1)->getDecl() == cast<DeclRefExpr>(E2)->getDecl();
+ case Stmt::BinaryOperatorClass: {
+ auto BO2 = cast<BinaryOperator>(E2);
+ return areEqualIntegralBinaryOperators(cast<BinaryOperator>(E1),
+ BO2->getLHS(), BO2->getOpcode(),
+ BO2->getRHS(), Ctx);
+ }
+ default:
+ return false;
+ }
+}
+
// Given a two-param std::span construct call, matches iff the call has the
// following forms:
// 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE
@@ -366,14 +425,20 @@ isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {
// 4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size
// `n`
// 5. `std::span<T>{any, 0}`
+// 6. `std::span<T>{ (char *)f(args), args[N] * arg*[M]}`, where
+// `f` is a function with attribute `alloc_size(N, M)`;
+// `args` represents the list of arguments;
+// `N, M` are parameter indexes to the allocating element number and size.
AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
assert(Node.getNumArgs() == 2 &&
"expecting a two-parameter std::span constructor");
- const Expr *Arg0 = Node.getArg(0)->IgnoreImplicit();
- const Expr *Arg1 = Node.getArg(1)->IgnoreImplicit();
- auto HaveEqualConstantValues = [&Finder](const Expr *E0, const Expr *E1) {
- if (auto E0CV = E0->getIntegerConstantExpr(Finder->getASTContext()))
- if (auto E1CV = E1->getIntegerConstantExpr(Finder->getASTContext())) {
+ const Expr *Arg0 = Node.getArg(0)->IgnoreParenImpCasts();
+ const Expr *Arg1 = Node.getArg(1)->IgnoreParenImpCasts();
+ ASTContext &Ctx = Finder->getASTContext();
+
+ auto HaveEqualConstantValues = [&Ctx](const Expr *E0, const Expr *E1) {
+ if (auto E0CV = E0->getIntegerConstantExpr(Ctx))
+ if (auto E1CV = E1->getIntegerConstantExpr(Ctx)) {
return APSInt::compareValues(*E0CV, *E1CV) == 0;
}
return false;
@@ -386,12 +451,14 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
return false;
};
std::optional<APSInt> Arg1CV =
- Arg1->getIntegerConstantExpr(Finder->getASTContext());
+ Arg1->getIntegerConstantExpr(Ctx);
if (Arg1CV && Arg1CV->isZero())
// Check form 5:
return true;
- switch (Arg0->IgnoreImplicit()->getStmtClass()) {
+
+ // Check forms 1-3:
+ switch (Arg0->getStmtClass()) {
case Stmt::CXXNewExprClass:
if (auto Size = cast<CXXNewExpr>(Arg0)->getArraySize()) {
// Check form 1:
@@ -417,12 +484,41 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
QualType Arg0Ty = Arg0->IgnoreImplicit()->getType();
if (auto *ConstArrTy =
- Finder->getASTContext().getAsConstantArrayType(Arg0Ty)) {
+ Ctx.getAsConstantArrayType(Arg0Ty)) {
const APSInt ConstArrSize = APSInt(ConstArrTy->getSize());
// Check form 4:
return Arg1CV && APSInt::compareValues(ConstArrSize, *Arg1CV) == 0;
}
+ // Check form 6:
+ if (auto CCast = dyn_cast<CStyleCastExpr>(Arg0)) {
+ if (!CCast->getType()->isPointerType())
+ return false;
+
+ QualType PteTy = CCast->getType()->getPointeeType();
+
+ if (!(PteTy->isConstantSizeType() && Ctx.getTypeSizeInChars(PteTy).isOne()))
+ return false;
+
+ if (auto Call = dyn_cast<CallExpr>(CCast->getSubExpr())) {
+ if (const FunctionDecl *FD = Call->getDirectCallee())
+ if (auto AllocAttr = FD->getAttr<AllocSizeAttr>()) {
+ const Expr *EleSizeExpr =
+ Call->getArg(AllocAttr->getElemSizeParam().getASTIndex());
+ // NumElemIdx is invalid if AllocSizeAttr has 1 argument:
+ ParamIdx NumElemIdx = AllocAttr->getNumElemsParam();
+
+ if (!NumElemIdx.isValid())
+ return areEqualIntegers(Arg1, EleSizeExpr, Ctx);
+
+ const Expr *NumElesExpr = Call->getArg(NumElemIdx.getASTIndex());
+
+ if (auto BO = dyn_cast<BinaryOperator>(Arg1))
+ return areEqualIntegralBinaryOperators(BO, NumElesExpr, BO_Mul,
+ EleSizeExpr, Ctx);
+ }
+ }
+ }
return false;
}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
index c138fe088b3ba9..a1c47c5afccc05 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp
@@ -137,6 +137,44 @@ namespace construct_wt_begin_end {
}
} // namespace construct_wt_begin_end
+namespace test_alloc_size_attr {
+ void * my_alloc(unsigned size) __attribute__((alloc_size(1)));
+ void * my_alloc2(unsigned count, unsigned size) __attribute__((alloc_size(1,2)));
+
+ template<typename T>
+ void foo(std::span<T> S);
+
+ void safe(int x, unsigned y) {
+ foo(std::span<char>{(char *)my_alloc(10), 10});
+ foo(std::span<char>{(char *)my_alloc(x), x});
+ foo(std::span<char>{(char *)my_alloc(x * y), x * y});
+ foo(std::span<char>{(char *)my_alloc(x * y), y * x});
+ foo(std::span<char>{(char *)my_alloc(x * y + x), x * y + x});
+ foo(std::span<char>{(char *)my_alloc(x * y + x), x + y * x});
+
+ foo(std::span<char>{(char *)my_alloc2(x, y), x * y});
+ foo(std::span<char>{(char *)my_alloc2(x, y), y * x});
+ //foo(std::span<char>{(char *)my_alloc2(x, sizeof(char)), x}); // lets not worry about this case for now
+ foo(std::span<char>{(char *)my_alloc2(x, sizeof(char)), x * sizeof(char)});
+ //foo(std::span<char>{(char *)my_alloc2(10, sizeof(char)), 10});
+ foo(std::span<char>{(char *)my_alloc2(10, sizeof(char)), 10 * sizeof(char)});
+ }
+
+ void unsafe(int x, int y) {
+ foo(std::span<char>{(char *)my_alloc(10), 11}); // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}}
+ foo(std::span<char>{(char *)my_alloc(x * y), x + y}); // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}}
+ foo(std::span<int>{(int *)my_alloc(x), x}); // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}}
+ foo(std::span<char>{(char *)my_alloc2(x, y), x + y}); // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}}
+ foo(std::span<int>{(int *)my_alloc2(x, y), x * y}); // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}}
+ }
+
+ void unsupport(int x, int y) {
+ // Casting to `T*` where sizeof(T) > 1 is not supported yet:
+ foo(std::span<long>{(long *)my_alloc(10 * sizeof(long)), 10}); // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}}
+ foo(std::span<long>{(long *)my_alloc2(x, sizeof(long)), x}); // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}}
+ }
+}
+
namespace test_flag {
void f(int *p) {
#pragma clang diagnostic push
|
You can test this locally with the following command:git-clang-format --diff 5cc033b5f2ac0f257ee6c7fd457da0425dc64d37 a33defd0d3e497259fecc553881110a52eb599a3 --extensions cpp -- clang/lib/Analysis/UnsafeBufferUsage.cpp clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp View the diff from clang-format here.diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 89b16c9c31..5321b15194 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -394,8 +394,7 @@ static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx) {
Expr::EvalResult ER1, ER2;
// If both are constants:
- if (E1->EvaluateAsInt(ER1, Ctx) &&
- E2->EvaluateAsInt(ER2, Ctx))
+ if (E1->EvaluateAsInt(ER1, Ctx) && E2->EvaluateAsInt(ER2, Ctx))
return ER1.Val.getInt() == ER2.Val.getInt();
// Otherwise, they should have identical stmt kind:
|
3f4032e
to
bfcce44
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens in cases where the expression are equivalent but are composed differently. For instance, my_alloc( x + y + z), z + y + x}). Can you add this test case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. It cannot figure out this case. It's too complicated for the simple expression comparison function.
I add this test as unsupported.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that it is fine to not handle this for now.
If we do decide to handle this, we need to be careful because this equivalence does not hold for signed integer arithmetic expressions in C. The equivalence relies on commutativity and associativity of integer arithmetic. Reassociation can change whether overflow occurs for integer expressions at runtime and signed integer overflow is undefined behavior in C. If, however, we assume that signed integer arithmetic is 2's complement arithmetic (which can be specified via a compiler flag), then this equivalence holds for signed integer expressions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very interesting. Potentially an API that could be exposed to other analyses too!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please rebase this PR? The isSafeSpanTwoParamConstruct matcher has changes on the main branch that are not present here. For instance, case 6 is now used by :std::span<T>{std::addressof(...), 1}
on the main branch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yup!
…aram span ctor warning The -Wunsafe-buffer-usage analysis now recognize allocating functions that are explicitly annotated with `__attribute__((alloc_size))`. Now, for example, it can prove safety of the following span construction: ``` void * my_alloc(unsigned size) __attribute__((alloc_size(1))); std::span<char>{(char *)my_alloc(10), 10}; // <-- this is safe ``` (rdar://136634730)
bfcce44
to
a33defd
Compare
CC: @dtarditi |
LGTM! |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/46/builds/13202 Here is the relevant piece of the build log for the reference
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ziqingluo-90 @malavikasamak thanks for adding me to this review. I left one informational comment and one piece of feedback.
// expr := DRE | const-value | expr BO expr | ||
// BO := '*' | '+' | ||
// | ||
// FIXME: We can reuse the expression comparator of the interop analysis after |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ziqingluo-90 Is the expression comparator of the interop analysis the same or different from what is done here?
I believe this expression comparison can result in exponential work in the size of an expression.. AreEquaIntegralBinaryOperator makes 2 calls to AreEqualIntegers per subexpression. AreEqualIntegers can call back into EqualIntegeralBinaryOperator on subexpressions of its argument.
If the plan is to eventually replace this code with the expression comparator from interop analysis, then this is not worth fixing now. However, if the expression comparator has the same issue or that is not the plan, then this should be fixed. It can be fixed be bounding the amount of work that is allowed, for example, by limiting the recursive depth that is allowed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that it is fine to not handle this for now.
If we do decide to handle this, we need to be careful because this equivalence does not hold for signed integer arithmetic expressions in C. The equivalence relies on commutativity and associativity of integer arithmetic. Reassociation can change whether overflow occurs for integer expressions at runtime and signed integer overflow is undefined behavior in C. If, however, we assume that signed integer arithmetic is 2's complement arithmetic (which can be specified via a compiler flag), then this equivalence holds for signed integer expressions.
We can take advantage of the attribute
alloc_size
. For example,rdar://136634730