Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation #109208

Merged
merged 15 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,24 @@ C++ Specific Potentially Breaking Changes
template <typename T>
void f();

- During constant evaluation, comparisons between different evaluations of the
same string literal are now correctly treated as non-constant, and comparisons
between string literals that cannot possibly overlap in memory are now treated
as constant. This updates Clang to match the anticipated direction of open core
issue `CWG2765 <http://wg21.link/CWG2765>`, but is subject to change once that
issue is resolved.

.. code-block:: c++

constexpr const char *f() { return "hello"; }
constexpr const char *g() { return "world"; }
// Used to evaluate to false, now error: non-constant comparison.
constexpr bool a = f() == f();
// Might evaluate to true or false, as before.
bool at_runtime() { return f() == f(); }
// Was error, now evaluates to false.
constexpr bool b = f() == g();

ABI Changes in This Version
---------------------------

Expand Down
12 changes: 12 additions & 0 deletions clang/include/clang/AST/ASTContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,14 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// This is lazily created. This is intentionally not serialized.
mutable llvm::StringMap<StringLiteral *> StringLiteralCache;

/// The next string literal "version" to allocate during constant evaluation.
/// This is used to distinguish between repeated evaluations of the same
/// string literal.
///
/// We don't need to serialize this because constants get re-evaluated in the
/// current file before they are compared locally.
unsigned NextStringLiteralVersion = 0;

/// MD5 hash of CUID. It is calculated when first used and cached by this
/// data member.
mutable std::string CUIDHash;
Expand Down Expand Up @@ -3278,6 +3286,10 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// PredefinedExpr to cache evaluated results.
StringLiteral *getPredefinedStringLiteralFromCache(StringRef Key) const;

/// Return the next version number to be used for a string literal evaluated
/// as part of constant evaluation.
unsigned getNextStringLiteralVersion() { return NextStringLiteralVersion++; }

/// Return a declaration for the global GUID object representing the given
/// GUID value.
MSGuidDecl *getMSGuidDecl(MSGuidDeclParts Parts) const;
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticASTKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ def note_constexpr_pointer_constant_comparison : Note<
"at runtime">;
def note_constexpr_literal_comparison : Note<
"comparison of addresses of literals has unspecified value">;
def note_constexpr_opaque_call_comparison : Note<
"comparison against opaque constant address '%0' can only be performed at "
"runtime">;
def note_constexpr_pointer_weak_comparison : Note<
"comparison against address of weak declaration '%0' can only be performed "
"at runtime">;
Expand Down
130 changes: 115 additions & 15 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/APFixedPoint.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/SipHash.h"
Expand Down Expand Up @@ -2061,15 +2063,21 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) {
return true;
}

/// Should this call expression be treated as a no-op?
static bool IsNoOpCall(const CallExpr *E) {
/// Should this call expression be treated as forming an opaque constant?
static bool IsOpaqueConstantCall(const CallExpr *E) {
unsigned Builtin = E->getBuiltinCallee();
return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString ||
Builtin == Builtin::BI__builtin___NSStringMakeConstantString ||
Builtin == Builtin::BI__builtin_ptrauth_sign_constant ||
Comment on lines 2069 to 2071
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we get test coverage for these cases?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

Builtin == Builtin::BI__builtin_function_start);
}

static bool IsOpaqueConstantCall(const LValue &LVal) {
const auto *BaseExpr =
llvm::dyn_cast_if_present<CallExpr>(LVal.Base.dyn_cast<const Expr *>());
return BaseExpr && IsOpaqueConstantCall(BaseExpr);
}

static bool IsGlobalLValue(APValue::LValueBase B) {
// C++11 [expr.const]p3 An address constant expression is a prvalue core
// constant expression of pointer type that evaluates to...
Expand Down Expand Up @@ -2115,7 +2123,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) {
case Expr::ObjCBoxedExprClass:
return cast<ObjCBoxedExpr>(E)->isExpressibleAsConstantInitializer();
case Expr::CallExprClass:
return IsNoOpCall(cast<CallExpr>(E));
return IsOpaqueConstantCall(cast<CallExpr>(E));
// For GCC compatibility, &&label has static storage duration.
case Expr::AddrLabelExprClass:
return true;
Expand All @@ -2142,11 +2150,91 @@ static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) {
return LVal.Base.dyn_cast<const ValueDecl*>();
}

static bool IsLiteralLValue(const LValue &Value) {
if (Value.getLValueCallIndex())
// Information about an LValueBase that is some kind of string.
struct LValueBaseString {
std::string ObjCEncodeStorage;
StringRef Bytes;
int CharWidth;
};

// Gets the lvalue base of LVal as a string.
static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal,
LValueBaseString &AsString) {
const auto *BaseExpr = LVal.Base.dyn_cast<const Expr *>();
if (!BaseExpr)
return false;

// For ObjCEncodeExpr, we need to compute and store the string.
if (const auto *EE = dyn_cast<ObjCEncodeExpr>(BaseExpr)) {
Info.Ctx.getObjCEncodingForType(EE->getEncodedType(),
AsString.ObjCEncodeStorage);
AsString.Bytes = AsString.ObjCEncodeStorage;
AsString.CharWidth = 1;
return true;
}

// Otherwise, we have a StringLiteral.
const auto *Lit = dyn_cast<StringLiteral>(BaseExpr);
if (const auto *PE = dyn_cast<PredefinedExpr>(BaseExpr))
Lit = PE->getFunctionName();

if (!Lit)
return false;
const Expr *E = Value.Base.dyn_cast<const Expr*>();
return E && !isa<MaterializeTemporaryExpr>(E);

AsString.Bytes = Lit->getBytes();
AsString.CharWidth = Lit->getCharByteWidth();
return true;
}

// Determine whether two string literals potentially overlap. This will be the
// case if they agree on the values of all the bytes on the overlapping region
// between them.
//
// The overlapping region is the portion of the two string literals that must
// overlap in memory if the pointers actually point to the same address at
// runtime. For example, if LHS is "abcdef" + 3 and RHS is "cdef\0gh" + 1 then
// the overlapping region is "cdef\0", which in this case does agree, so the
// strings are potentially overlapping. Conversely, for "foobar" + 3 versus
// "bazbar" + 3, the overlapping region contains all of both strings, so they
// are not potentially overlapping, even though they agree from the given
// addresses onwards.
//
// See open core issue CWG2765 which is discussing the desired rule here.
static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
const LValue &LHS,
const LValue &RHS) {
LValueBaseString LHSString, RHSString;
if (!GetLValueBaseAsString(Info, LHS, LHSString) ||
!GetLValueBaseAsString(Info, RHS, RHSString))
return false;

// This is the byte offset to the location of the first character of LHS
// within RHS. We don't need to look at the characters of one string that
// would appear before the start of the other string if they were merged.
CharUnits Offset = RHS.Offset - LHS.Offset;
if (Offset.isNegative())
LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity());
else
RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity());

bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size();
StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes;
StringRef Shorter = LHSIsLonger ? RHSString.Bytes : LHSString.Bytes;
int ShorterCharWidth = (LHSIsLonger ? RHSString : LHSString).CharWidth;

// The null terminator isn't included in the string data, so check for it
// manually. If the longer string doesn't have a null terminator where the
// shorter string ends, they aren't potentially overlapping.
for (int NullByte : llvm::seq(ShorterCharWidth)) {
if (Shorter.size() + NullByte >= Longer.size())
break;
if (Longer[Shorter.size() + NullByte])
return false;
}

// Otherwise, they're potentially overlapping if and only if the overlapping
// region is the same.
return Shorter == Longer.take_front(Shorter.size());
}

static bool IsWeakLValue(const LValue &Value) {
Expand Down Expand Up @@ -8573,7 +8661,10 @@ class LValueExprEvaluator
bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
bool VisitMemberExpr(const MemberExpr *E);
bool VisitStringLiteral(const StringLiteral *E) { return Success(E); }
bool VisitStringLiteral(const StringLiteral *E) {
return Success(APValue::LValueBase(
E, 0, Info.getASTContext().getNextStringLiteralVersion()));
}
bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); }
bool VisitCXXTypeidExpr(const CXXTypeidExpr *E);
bool VisitCXXUuidofExpr(const CXXUuidofExpr *E);
Expand Down Expand Up @@ -9639,7 +9730,7 @@ static bool isOneByteCharacterType(QualType T) {

bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
if (IsNoOpCall(E))
if (IsOpaqueConstantCall(E))
return Success(E);

switch (BuiltinOp) {
Expand Down Expand Up @@ -13889,13 +13980,22 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
(!RHSValue.Base && !RHSValue.Offset.isZero()))
return DiagComparison(diag::note_constexpr_pointer_constant_comparison,
!RHSValue.Base);
// It's implementation-defined whether distinct literals will have
// distinct addresses. In clang, the result of such a comparison is
// unspecified, so it is not a constant expression. However, we do know
// that the address of a literal will be non-null.
if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) &&
LHSValue.Base && RHSValue.Base)
// C++2c [intro.object]/10:
// Two objects [...] may have the same address if [...] they are both
// potentially non-unique objects.
// C++2c [intro.object]/9:
// An object is potentially non-unique if it is a string literal object,
// the backing array of an initializer list, or a subobject thereof.
//
// This makes the comparison result unspecified, so it's not a constant
// expression.
//
// TODO: Do we need to handle the initializer list case here?
if (ArePotentiallyOverlappingStringLiterals(Info, LHSValue, RHSValue))
return DiagComparison(diag::note_constexpr_literal_comparison);
if (IsOpaqueConstantCall(LHSValue) || IsOpaqueConstantCall(RHSValue))
return DiagComparison(diag::note_constexpr_opaque_call_comparison,
!IsOpaqueConstantCall(LHSValue));
// We can't tell whether weak symbols will end up pointing to the same
// object.
if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue))
Expand Down
3 changes: 2 additions & 1 deletion clang/test/AST/ByteCode/builtin-functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,8 @@ namespace shufflevector {
namespace FunctionStart {
void a(void) {}
static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \
// both-note {{comparison of addresses of literals has unspecified value}}
// ref-note {{comparison against opaque constant address '&__builtin_function_start(a)'}} \
// expected-note {{comparison of addresses of literals has unspecified value}}
}

namespace BuiltinInImplicitCtor {
Expand Down
20 changes: 7 additions & 13 deletions clang/test/AST/ByteCode/cxx20.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ constexpr int f() {
static_assert(f());
#endif

/// Distinct literals have disctinct addresses.
/// Distinct literals have distinct addresses.
/// see https://github.com/llvm/llvm-project/issues/58754
constexpr auto foo(const char *p) { return p; }
constexpr auto p1 = "test1";
Expand All @@ -108,22 +108,16 @@ constexpr auto p2 = "test2";
constexpr bool b1 = foo(p1) == foo(p1);
static_assert(b1);

constexpr bool b2 = foo(p1) == foo(p2); // ref-error {{must be initialized by a constant expression}} \
// ref-note {{comparison of addresses of literals}} \
// ref-note {{declared here}}
static_assert(!b2); // ref-error {{not an integral constant expression}} \
// ref-note {{not a constant expression}}
constexpr bool b2 = foo(p1) == foo(p2);
static_assert(!b2);

constexpr auto name1() { return "name1"; }
constexpr auto name2() { return "name2"; }

constexpr auto b3 = name1() == name1();
static_assert(b3);
constexpr auto b4 = name1() == name2(); // ref-error {{must be initialized by a constant expression}} \
// ref-note {{has unspecified value}} \
// ref-note {{declared here}}
static_assert(!b4); // ref-error {{not an integral constant expression}} \
// ref-note {{not a constant expression}}
constexpr auto b3 = name1() == name1(); // ref-error {{must be initialized by a constant expression}} \
// ref-note {{comparison of addresses of literals}}
constexpr auto b4 = name1() == name2();
static_assert(!b4);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just from looking at these few lines, I don't understand why b3 warns but b4 doesn't. They both compare the address of literals.

The bytecode interpreter simply creates global variables for string literals, so b3 here is simply true, like it was in the current interpreter before. Is this behavior wrong now?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The C++ model in general is that each evaluation of a string literal expression can produce a distinct object, and that these objects can fully or partially overlap each other in memory when they have suitable values; that's the model that this PR is implementing.

b3 is non-constant because each call to name1() can produce a distinct value, so the comparison result is unspecified. b4 is constant and false under this PR because the values returned by name1() and name2() cannot possibly be the same -- those two string literal evaluations can't produce the same value because the strings have different contents.


namespace UninitializedFields {
class A {
Expand Down
60 changes: 60 additions & 0 deletions clang/test/Modules/string-literal-uniqueness.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// RUN: rm -rf %t
// RUN: mkdir -p %t
// RUN: split-file %s %t

// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cpp \
// RUN: -o %t/A.pcm

// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/b.cpp \
// RUN: -fmodule-file=A=%t/A.pcm -o %t/B.pcm

// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/c.cpp \
// RUN: -fmodule-file=A=%t/A.pcm -o %t/C.pcm

// RUN: %clang_cc1 -std=c++20 -verify %t/main.cpp \
// RUN: -fmodule-file=A=%t/A.pcm \
// RUN: -fmodule-file=B=%t/B.pcm \
// RUN: -fmodule-file=C=%t/C.pcm

// expected-no-diagnostics

//--- a.cpp

export module A;
export consteval const char *hello() { return "hello"; }
export constexpr const char *helloA0 = hello();
export constexpr const char *helloA1 = helloA0;
export constexpr const char *helloA2 = hello();

//--- b.cpp

export module B;
import A;
export constexpr const char *helloB1 = helloA0;
export constexpr const char *helloB2 = hello();

//--- c.cpp

export module C;
import A;
export constexpr const char *helloC1 = helloA1;
export constexpr const char *helloC2 = hello();

//--- main.cpp

import A;
import B;
import C;

// These are valid: they refer to the same evaluation of the same constant.
static_assert(helloA0 == helloA1);
static_assert(helloA0 == helloB1);
static_assert(helloA0 == helloC1);

// These refer to distinct evaluations, and so may or may not be equal.
static_assert(helloA1 == helloA2); // expected-error {{}} expected-note {{unspecified value}}
static_assert(helloA1 == helloB2); // expected-error {{}} expected-note {{unspecified value}}
static_assert(helloA1 == helloC2); // expected-error {{}} expected-note {{unspecified value}}
static_assert(helloA2 == helloB2); // expected-error {{}} expected-note {{unspecified value}}
static_assert(helloA2 == helloC2); // expected-error {{}} expected-note {{unspecified value}}
static_assert(helloB2 == helloC2); // expected-error {{}} expected-note {{unspecified value}}
14 changes: 11 additions & 3 deletions clang/test/SemaCXX/builtins.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions
// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions
// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions -fptrauth-intrinsics
// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions -fptrauth-intrinsics
typedef const struct __CFString * CFStringRef;
#define CFSTR __builtin___CFStringMakeConstantString
#define NSSTR __builtin___NSStringMakeConstantString

void f() {
#if !defined(__MVS__) && !defined(_AIX)
// Builtin function __builtin___CFStringMakeConstantString is currently
// unsupported on z/OS and AIX.
(void)CFStringRef(CFSTR("Hello"));

constexpr bool a = CFSTR("Hello") == CFSTR("Hello");
// expected-error@-1 {{constant expression}}
// expected-note@-2 {{comparison against opaque constant address '&__builtin___CFStringMakeConstantString("Hello")'}}
constexpr bool b = NSSTR("Hello") == NSSTR("Hello");
// expected-error@-1 {{constant expression}}
// expected-note@-2 {{comparison against opaque constant address '&__builtin___NSStringMakeConstantString("Hello")'}}
#endif
}

Expand Down Expand Up @@ -47,7 +55,7 @@ void a(void) {}
int n;
void *p = __builtin_function_start(n); // expected-error {{argument must be a function}}
static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static assertion expression is not an integral constant expression}}
// expected-note@-1 {{comparison of addresses of literals has unspecified value}}
// expected-note@-1 {{comparison against opaque constant address '&__builtin_function_start(a)'}}
} // namespace function_start

void no_ms_builtins() {
Expand Down
Loading