Skip to content

Commit

Permalink
Adapt bitset word size (#1554)
Browse files Browse the repository at this point in the history
  • Loading branch information
esseivaju authored Dec 28, 2024
1 parent a95a668 commit 6ec7f84
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 17 deletions.
47 changes: 35 additions & 12 deletions src/corecel/cont/Bitset.hh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#pragma once

#include <climits>
#include <cstdint>
#include <type_traits>

#include "corecel/Config.hh"

Expand All @@ -27,7 +29,7 @@ namespace celeritas
* for our current use case. Given that GPU typically use 32-bit words, this
* uses unsigned int as the word type instead of the unsigned long used by the
* standard library. This container is not thread-safe, multiple threads are
* likely to manipulate the same word, even we accessing different indices.
* likely to manipulate the same word, even when accessing different indices.
*
* The following methods are not implemented:
* - conversions to string, to_ulong, to_ullong
Expand All @@ -44,7 +46,10 @@ class Bitset
public:
//!@{
//! \name Type aliases
using word_type = unsigned int;
using word_type = std::conditional_t<
(N <= 8),
std::uint8_t,
std::conditional_t<(N <= 16), std::uint16_t, size_type>>;
//!@}

class reference;
Expand All @@ -53,7 +58,7 @@ class Bitset
//// CONSTRUCTORS ////

// Default construct with zeros for all bits
CELER_CONSTEXPR_FUNCTION Bitset() = default;
constexpr Bitset() = default;

// Construct implicitly from a bitset encoded as an integer
CELER_CONSTEXPR_FUNCTION Bitset(word_type value) noexcept;
Expand Down Expand Up @@ -149,6 +154,9 @@ class Bitset
// Create a mask for a given bit index
static CELER_CONSTEXPR_FUNCTION word_type mask(size_type pos) noexcept;

// Create a negative mask for a given bit index
static CELER_CONSTEXPR_FUNCTION word_type neg_mask(size_type pos) noexcept;

// Get the word for a given bit position
CELER_CONSTEXPR_FUNCTION word_type get_word(size_type pos) const
noexcept(!CELERITAS_DEBUG);
Expand Down Expand Up @@ -185,9 +193,9 @@ class Bitset<N>::reference
{
}

CELER_CONSTEXPR_FUNCTION reference(reference const&) = default;
constexpr reference(reference const&) = default;

CELER_FUNCTION ~reference() noexcept = default;
~reference() noexcept = default;

//! Assignment for b[i] = x;
CELER_CONSTEXPR_FUNCTION
Expand All @@ -199,7 +207,7 @@ class Bitset<N>::reference
}
else
{
*word_pointer_ &= ~Bitset::mask(bit_pos_);
*word_pointer_ &= Bitset::neg_mask(bit_pos_);
}
return *this;
}
Expand All @@ -216,7 +224,7 @@ class Bitset<N>::reference
}
else
{
*word_pointer_ &= ~Bitset::mask(bit_pos_);
*word_pointer_ &= Bitset::neg_mask(bit_pos_);
}
}
return *this;
Expand Down Expand Up @@ -313,15 +321,16 @@ CELER_CONSTEXPR_FUNCTION bool Bitset<N>::all() const noexcept
{
for (size_type i = 0; i < num_words_ - 1; ++i)
{
if (words_[i] != ~word_type(0))
if (words_[i] != static_cast<word_type>(~word_type(0)))
{
return false;
}
}

// Only compare the last word up to the last bit of the bitset
return this->last_word()
== (~word_type(0) >> (num_words_ * bits_per_word_ - N));
== (static_cast<word_type>(~word_type(0))
>> (num_words_ * bits_per_word_ - N));
}

//---------------------------------------------------------------------------//
Expand Down Expand Up @@ -416,7 +425,7 @@ CELER_CONSTEXPR_FUNCTION Bitset<N>& Bitset<N>::set() noexcept
{
for (size_type i = 0; i < num_words_; ++i)
{
words_[i] = ~word_type(0);
words_[i] = static_cast<word_type>(~word_type(0));
}

// Clear unused bits on the last word
Expand Down Expand Up @@ -456,7 +465,7 @@ CELER_CONSTEXPR_FUNCTION Bitset<N>&
Bitset<N>::reset(size_type pos) noexcept(!CELERITAS_DEBUG)
{
CELER_EXPECT(pos < N);
this->get_word(pos) &= ~Bitset::mask(pos);
this->get_word(pos) &= Bitset::neg_mask(pos);
return *this;
}

Expand Down Expand Up @@ -512,6 +521,19 @@ Bitset<N>::mask(size_type pos) noexcept -> word_type
return word_type(1) << Bitset::which_bit(pos);
}

//---------------------------------------------------------------------------//
/*!
* Create a negative mask (a single 0 bit) for a given bit index. The purpose
* of this function is to cast a potentially promoted word_type (from ~) back
* to the original word_type.
*/
template<size_type N>
CELER_CONSTEXPR_FUNCTION auto
Bitset<N>::neg_mask(size_type pos) noexcept -> word_type
{
return ~(word_type(1) << Bitset::which_bit(pos));
}

//---------------------------------------------------------------------------//
//! Get the word for a given bit position
template<size_type N>
Expand Down Expand Up @@ -556,7 +578,8 @@ CELER_CONSTEXPR_FUNCTION void Bitset<N>::sanitize() noexcept
constexpr size_type extra_bits = N % bits_per_word_;
if constexpr (extra_bits != 0)
{
this->last_word() &= ~((~word_type(0)) << extra_bits);
this->last_word() &= static_cast<word_type>(
~(static_cast<word_type>(~word_type(0)) << extra_bits));
}
}

Expand Down
27 changes: 22 additions & 5 deletions src/corecel/math/Algorithms.hh
Original file line number Diff line number Diff line change
Expand Up @@ -738,18 +738,35 @@ CELER_FORCEINLINE_FUNCTION void sincospi(double a, double* s, double* c)
/*!
* Count the number of set bits in an integer.
*/
template<class T>
#if defined(_MSC_VER)
inline int popcount(unsigned int x) noexcept
inline int popcount(T x) noexcept
#else
inline constexpr int popcount(unsigned int x) noexcept
inline constexpr int popcount(T x) noexcept
#endif
{
static_assert(sizeof(T) <= 8,
"popcount is only defined for 32-bit and 64-bit integers");
static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"popcount is only defined for unsigned integral types");

if constexpr (sizeof(T) <= 4)
{
#if CELER_DEVICE_COMPILE
return __popc(x);
#elif defined(_MSC_VER)
return __popcnt(x);
#else
return __builtin_popcount(x);
#endif
}

#if CELER_DEVICE_COMPILE
return __popc(x);
return __popcll(x);
#elif defined(_MSC_VER)
return __popcnt(x);
return __popcnt64(x);
#else
return __builtin_popcount(x);
return __builtin_popcountl(x);
#endif
}

Expand Down

0 comments on commit 6ec7f84

Please sign in to comment.