Skip to content

Commit

Permalink
ROLLBACK: Limit slot_size to 2^16-1 and maximum table size to 2^43-1.
Browse files Browse the repository at this point in the history
Limiting `slot_size` may subtly break workflows with seemingly irrelevant changes.

PiperOrigin-RevId: 731443006
Change-Id: I458610d280c99f773b0a957bb8f3d6d00529551a
  • Loading branch information
goldvitaly authored and copybara-github committed Feb 26, 2025
1 parent fbc0df2 commit b22423e
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 136 deletions.
54 changes: 18 additions & 36 deletions absl/container/internal/raw_hash_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,6 @@ static_assert(NumControlBytes(SooCapacity()) <= 17,

namespace {

[[noreturn]] ABSL_ATTRIBUTE_NOINLINE void HashTableSizeOverflow() {
ABSL_RAW_LOG(FATAL, "Hash table size overflow");
}

void ValidateMaxSize(size_t size, size_t slot_size) {
if (IsAboveMaxValidSize(size, slot_size)) {
HashTableSizeOverflow();
}
}

// Returns "random" seed.
inline size_t RandomSeed() {
#ifdef ABSL_HAVE_THREAD_LOCAL
Expand Down Expand Up @@ -525,6 +515,9 @@ void ResizeEmptyNonAllocatedTableImpl(CommonFields& common, size_t new_capacity,
assert(common.capacity() <= policy.soo_capacity);
assert(common.empty());
const size_t slot_size = policy.slot_size;
if (ABSL_PREDICT_FALSE(new_capacity > MaxValidCapacity(slot_size))) {
HashTableSizeOverflow();
}
HashtablezInfoHandle infoz;
const bool should_sample =
policy.is_hashtablez_eligible && (force_infoz || ShouldSampleNextTable());
Expand Down Expand Up @@ -930,25 +923,10 @@ void ResizeAllocatedTable(CommonFields& common, size_t new_capacity,
common, new_capacity, common.infoz(), policy);
}

void ReserveEmptyNonAllocatedTableToFitNewSize(CommonFields& common,
size_t new_size,
const PolicyFunctions& policy) {
ValidateMaxSize(new_size, policy.slot_size);
ResizeEmptyNonAllocatedTableImpl(
common, NormalizeCapacity(GrowthToLowerboundCapacity(new_size)),
/*force_infoz=*/false, policy);
// This is after resize, to ensure that we have completed the allocation
// and have potentially sampled the hashtable.
common.infoz().RecordReservation(new_size);
common.reset_reserved_growth(new_size);
common.set_reservation_size(new_size);
}

void ReserveEmptyNonAllocatedTableToFitBucketCount(
CommonFields& common, size_t bucket_count, const PolicyFunctions& policy) {
ValidateMaxSize(bucket_count, policy.slot_size);
ResizeEmptyNonAllocatedTableImpl(common, NormalizeCapacity(bucket_count),
/*force_infoz=*/false, policy);
void ResizeEmptyNonAllocatedTable(CommonFields& common, size_t new_capacity,
const PolicyFunctions& policy) {
ResizeEmptyNonAllocatedTableImpl(common, new_capacity, /*force_infoz=*/false,
policy);
}

void GrowEmptySooTableToNextCapacityForceSampling(
Expand Down Expand Up @@ -1015,11 +993,12 @@ void Rehash(CommonFields& common, size_t n, const PolicyFunctions& policy) {
NormalizeCapacity(n | GrowthToLowerboundCapacity(common.size()));
// n == 0 unconditionally rehashes as per the standard.
if (n == 0 || new_capacity > cap) {
ValidateMaxSize(n, policy.slot_size);
if (ABSL_PREDICT_FALSE(new_capacity > MaxValidCapacity(slot_size))) {
HashTableSizeOverflow();
}
if (cap == policy.soo_capacity) {
if (common.empty()) {
ResizeEmptyNonAllocatedTableImpl(common, new_capacity,
/*force_infoz=*/false, policy);
ResizeEmptyNonAllocatedTable(common, new_capacity, policy);
} else {
ResizeFullSooTable(common, new_capacity,
ResizeFullSooTableSamplingMode::kNoSampling, policy);
Expand All @@ -1035,9 +1014,6 @@ void Rehash(CommonFields& common, size_t n, const PolicyFunctions& policy) {

void ReserveAllocatedTable(CommonFields& common, size_t n,
const PolicyFunctions& policy) {
common.reset_reserved_growth(n);
common.set_reservation_size(n);

const size_t cap = common.capacity();
assert(!common.empty() || cap > policy.soo_capacity);
assert(cap > 0);
Expand All @@ -1047,8 +1023,10 @@ void ReserveAllocatedTable(CommonFields& common, size_t n,
if (n <= max_size_before_growth) {
return;
}
ValidateMaxSize(n, policy.slot_size);
const size_t new_capacity = NormalizeCapacity(GrowthToLowerboundCapacity(n));
if (ABSL_PREDICT_FALSE(new_capacity > MaxValidCapacity(policy.slot_size))) {
HashTableSizeOverflow();
}
if (cap == policy.soo_capacity) {
assert(!common.empty());
ResizeFullSooTable(common, new_capacity,
Expand Down Expand Up @@ -1088,6 +1066,10 @@ size_t PrepareInsertNonSoo(CommonFields& common, size_t hash,
return target.offset;
}

void HashTableSizeOverflow() {
ABSL_RAW_LOG(FATAL, "Hash table size overflow");
}

} // namespace container_internal
ABSL_NAMESPACE_END
} // namespace absl
82 changes: 30 additions & 52 deletions absl/container/internal/raw_hash_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,14 @@ constexpr size_t NormalizeCapacity(size_t n) {
return n ? ~size_t{} >> countl_zero(n) : 1;
}

constexpr size_t MaxValidCapacity(size_t slot_size) {
return NormalizeCapacity((std::numeric_limits<size_t>::max)() / 4 /
slot_size);
}

// Use a non-inlined function to avoid code bloat.
[[noreturn]] void HashTableSizeOverflow();

// General notes on capacity/growth methods below:
// - We use 7/8th as maximum load factor. For 16-wide groups, that gives an
// average of two empty slots per group.
Expand Down Expand Up @@ -1529,7 +1537,7 @@ ABSL_ATTRIBUTE_NOINLINE void DeallocateBackingArray(
struct PolicyFunctions {
uint32_t key_size;
uint32_t value_size;
uint16_t slot_size;
uint32_t slot_size;
uint16_t slot_align;
uint8_t soo_capacity;
bool is_hashtablez_eligible;
Expand Down Expand Up @@ -1576,20 +1584,14 @@ constexpr size_t SooSlotIndex() { return 1; }
// Allowing till 16 would require additional store that can be avoided.
constexpr size_t MaxSmallAfterSooCapacity() { return 7; }

// Resizes empty non-allocated table to the capacity to fit new_size elements.
// Resizes empty non-allocated table to the new capacity.
// Requires:
// 1. `c.capacity() == policy.soo_capacity`.
// 2. `c.empty()`.
// 3. `new_size > policy.soo_capacity`.
// 3. `new_capacity > policy.soo_capacity`.
// The table will be attempted to be sampled.
void ReserveEmptyNonAllocatedTableToFitNewSize(CommonFields& common,
size_t new_size,
const PolicyFunctions& policy);

// The same as ReserveEmptyNonAllocatedTableToFitNewSize, but resizes to the
// next valid capacity after `bucket_count`.
void ReserveEmptyNonAllocatedTableToFitBucketCount(
CommonFields& common, size_t bucket_count, const PolicyFunctions& policy);
void ResizeEmptyNonAllocatedTable(CommonFields& common, size_t new_capacity,
const PolicyFunctions& policy);

// Resizes empty non-allocated SOO table to NextCapacity(SooCapacity()) and
// forces the table to be sampled.
Expand Down Expand Up @@ -1657,33 +1659,6 @@ InitializeThreeElementsControlBytesAfterSoo(size_t hash, ctrl_t* new_ctrl) {
// new_ctrl after 2nd store = EHESEHEEEEE
}

// Template parameter is only used to enable testing.
template <size_t kSizeOfSizeT = sizeof(size_t)>
constexpr size_t MaxValidSize(size_t slot_size) {
if constexpr (kSizeOfSizeT == 4) {
return (size_t{1} << (kSizeOfSizeT * 8 - 2)) / slot_size - 1;
} else {
static_assert(kSizeOfSizeT == 8);
constexpr size_t kSizeBits = 43;
static_assert(
kSizeBits + sizeof(PolicyFunctions::slot_size) * 8 < 64,
"we expect that slot size is small enough that allocation size "
"will not overflow");
return CapacityToGrowth(static_cast<size_t>(uint64_t{1} << kSizeBits) - 1);
}
}

// Template parameter is only used to enable testing.
template <size_t kSizeOfSizeT = sizeof(size_t)>
constexpr size_t IsAboveMaxValidSize(size_t size, size_t slot_size) {
if constexpr (kSizeOfSizeT == 4) {
return uint64_t{size} * slot_size >
MaxValidSize<kSizeOfSizeT>(/*slot_size=*/1);
} else {
return size > MaxValidSize(slot_size);
}
}

// Returns the optimal size for memcpy when transferring SOO slot.
// Otherwise, returns the optimal size for memcpy SOO slot transfer
// to SooSlotIndex().
Expand Down Expand Up @@ -2150,8 +2125,8 @@ class raw_hash_set {
: settings_(CommonFields::CreateDefault<SooEnabled()>(), hash, eq,
alloc) {
if (bucket_count > DefaultCapacity()) {
ReserveEmptyNonAllocatedTableToFitBucketCount(common(), bucket_count,
GetPolicyFunctions());
ResizeEmptyNonAllocatedTable(common(), NormalizeCapacity(bucket_count),
GetPolicyFunctions());
}
}

Expand Down Expand Up @@ -2427,7 +2402,9 @@ class raw_hash_set {
ABSL_ASSUME(cap >= kDefaultCapacity);
return cap;
}
size_t max_size() const { return MaxValidSize(sizeof(slot_type)); }
size_t max_size() const {
return CapacityToGrowth(MaxValidCapacity(sizeof(slot_type)));
}

ABSL_ATTRIBUTE_REINITIALIZES void clear() {
if (SwisstableGenerationsEnabled() &&
Expand Down Expand Up @@ -2836,10 +2813,16 @@ class raw_hash_set {
ReserveAllocatedTable(common(), n, GetPolicyFunctions());
} else {
if (ABSL_PREDICT_TRUE(n > DefaultCapacity())) {
ReserveEmptyNonAllocatedTableToFitNewSize(common(), n,
GetPolicyFunctions());
ResizeEmptyNonAllocatedTable(
common(), NormalizeCapacity(GrowthToLowerboundCapacity(n)),
GetPolicyFunctions());
// This is after resize, to ensure that we have completed the allocation
// and have potentially sampled the hashtable.
infoz().RecordReservation(n);
}
}
common().reset_reserved_growth(n);
common().set_reservation_size(n);
}

// Extension API: support for heterogeneous keys.
Expand Down Expand Up @@ -3575,15 +3558,10 @@ class raw_hash_set {
}

static const PolicyFunctions& GetPolicyFunctions() {
static_assert(sizeof(slot_type) <= (std::numeric_limits<uint16_t>::max)(),
"Slot size is too large. Use std::unique_ptr for value type "
"or use absl::node_hash_{map,set}.");
static_assert(alignof(slot_type) <=
size_t{(std::numeric_limits<uint16_t>::max)()});
static_assert(sizeof(key_type) <=
size_t{(std::numeric_limits<uint32_t>::max)()});
static_assert(sizeof(value_type) <=
size_t{(std::numeric_limits<uint32_t>::max)()});
static_assert(sizeof(slot_type) <= (std::numeric_limits<uint32_t>::max)());
static_assert(alignof(slot_type) <= (std::numeric_limits<uint16_t>::max)());
static_assert(sizeof(key_type) <= (std::numeric_limits<uint32_t>::max)());
static_assert(sizeof(value_type) <= (std::numeric_limits<uint32_t>::max)());
static constexpr size_t kBackingArrayAlignment =
BackingArrayAlignment(alignof(slot_type));
static constexpr PolicyFunctions value = {
Expand Down
48 changes: 0 additions & 48 deletions absl/container/internal/raw_hash_set_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4039,60 +4039,12 @@ TEST(Table, MovedFromCallsFail) {
}
}

TEST(Table, MaxValidSize) {
IntTable t;
EXPECT_EQ(MaxValidSize(sizeof(IntTable::value_type)), t.max_size());
if constexpr (sizeof(size_t) == 8) {
for (size_t i = 0; i < 16; ++i) {
size_t slot_size = size_t{1} << i;
size_t max_size = MaxValidSize(slot_size);
ASSERT_LT(max_size, uint64_t{1} << 60);
ASSERT_TRUE(IsAboveMaxValidSize(max_size + 1, slot_size));
ASSERT_TRUE(IsAboveMaxValidSize(uint64_t{1} << 63, slot_size));
ASSERT_TRUE(IsAboveMaxValidSize(~size_t{}, slot_size));
ASSERT_TRUE(IsAboveMaxValidSize(~size_t{} / 8 * 7, slot_size));
// Given that key size have to be at least 6 bytes to reach so many
// different values, total memory usage of the table will be at least
// 2^42*7 bytes (28 TB).
// So that value should be enough for all practical purposes.
ASSERT_GE(max_size, uint64_t{1} << 42);
// We leave some headroom for the table metadata.
ASSERT_LT(NormalizeCapacity(GrowthToLowerboundCapacity(max_size)),
uint64_t{1} << 44);
}
}
EXPECT_LT(MaxValidSize</*kSizeOfSizeT=*/4>(1), 1 << 30);
EXPECT_LT(MaxValidSize</*kSizeOfSizeT=*/4>(2), 1 << 29);
EXPECT_TRUE(IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(1 << 30, 1));
EXPECT_TRUE(IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(1 << 29, 2));
EXPECT_TRUE(IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(~uint32_t{}, 1));
EXPECT_TRUE(IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(~uint32_t{} / 8 * 7, 1));
for (size_t i = 0; i < 16; ++i) {
size_t slot_size = size_t{1} << i;
size_t max_size = MaxValidSize</*kSizeOfSizeT=*/4>(slot_size);
ASSERT_LT(max_size, 1 << 30);
ASSERT_TRUE(
IsAboveMaxValidSize</*kSizeOfSizeT=*/4>(max_size + 1, slot_size));
size_t max_capacity =
NormalizeCapacity(GrowthToLowerboundCapacity(max_size));
ASSERT_LT(max_capacity, (size_t{1} << 31) / slot_size);
ASSERT_GT(max_capacity, (1 << 29) / slot_size);
}
}

TEST(Table, MaxSizeOverflow) {
size_t overflow = (std::numeric_limits<size_t>::max)();
EXPECT_DEATH_IF_SUPPORTED(IntTable t(overflow), "Hash table size overflow");
IntTable t;
EXPECT_DEATH_IF_SUPPORTED(t.reserve(overflow), "Hash table size overflow");
EXPECT_DEATH_IF_SUPPORTED(t.rehash(overflow), "Hash table size overflow");
size_t slightly_overflow = MaxValidSize(sizeof(IntTable::value_type)) + 1;
EXPECT_DEATH_IF_SUPPORTED(IntTable t2(slightly_overflow),
"Hash table size overflow");
EXPECT_DEATH_IF_SUPPORTED(t.reserve(slightly_overflow),
"Hash table size overflow");
EXPECT_DEATH_IF_SUPPORTED(t.rehash(slightly_overflow),
"Hash table size overflow");
}

// TODO(b/397453582): Remove support for const hasher and ermove this test.
Expand Down

0 comments on commit b22423e

Please sign in to comment.