Skip to content

Commit

Permalink
fixed issue with empty index and non-zero roots size, align fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Kazak Sergey committed May 30, 2023
1 parent 0ba04a7 commit fde86b2
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 17 deletions.
10 changes: 5 additions & 5 deletions examples/packed_index_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,15 +282,15 @@ int main(int argc, char **argv) {
{
// DotProduct
test<DotProductPacked16>(256, 256, 100000);
test<DotProductPacked16>(64, 128, 1000000);
test<DotProductPacked16>(64, 64, 1000000);
// and hard case for avx, causes a split
test<DotProductPacked16>(40, 64, 100000);
test<DotProductPacked16>(40, 40, 100000);
// Euclidean
test<EuclideanPacked16>(256, 256, 100000);
test<EuclideanPacked16>(64, 128, 1000000);
test<EuclideanPacked16>(64, 64, 1000000);
// and hard case for avx, causes a split
test<EuclideanPacked16>(40, 64, 100000);
CHECK_AND_THROW( in_mem_test(64, 128, 100000) > 0.9 );
test<EuclideanPacked16>(40, 40, 100000);
CHECK_AND_THROW( in_mem_test(64, 64, 100000) > 0.9 );
// in the case we try to make very small index
CHECK_AND_THROW( in_mem_test(64, 64, 17) >= 0.25 );
// edge cases
Expand Down
31 changes: 23 additions & 8 deletions src/packedlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include <assert.h>

#ifdef __GNUC__
# define alloca_aligned(sz) static_cast<char*>(__builtin_alloca_with_align(sz, 16))
# define alloca_aligned(sz) static_cast<char*>(__builtin_alloca_with_align(sz, 64))
#else
/* Clang must be generated already aligned stack allocation */
# define alloca_aligned(sz) static_cast<char*>(alloca(sz))
Expand Down Expand Up @@ -91,7 +91,10 @@ namespace detail {
MMapWriter& operator == ( MMapWriter const & ) = delete;
bool open( char const */*filename*/, size_t calculated_size )
{
void *p = mmap(0, calculated_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);

void *p = calculated_size ? mmap(0, calculated_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0)
: nullptr;
if( p == MAP_FAILED )
return false;

Expand All @@ -101,7 +104,8 @@ namespace detail {

#if defined(MADV_DONTDUMP)
// Exclude from a core dump those pages
madvise(p, calculated_size, MADV_DONTDUMP);
if (p != nullptr)
madvise(p, calculated_size, MADV_DONTDUMP);
#endif
return true;
}
Expand Down Expand Up @@ -227,18 +231,27 @@ class PackedAnnoyIndexer {

_n_nodes = _n_items;
while (1) {
if (q == -1 && _n_nodes >= _n_items * 2)
break;
if (q != -1 && _roots.size() >= (size_t)q)
if (q == -1)
{
if (_n_nodes >= _n_items * 2)
break;
}
else if (_roots.size() >= (size_t)q)
break;

if (_verbose) annoylib_showUpdate("pass %zd...\n", _roots.size());


vector<S> indices;
for (S i = 0; i < _n_items; i++) {
if (_get(i)->n_descendants >= 1) // Issue #223
indices.push_back(i);
}

// cannot make roots w/o items
if( indices.empty() )
break;

_roots.push_back(_make_tree(indices, true));
}

Expand Down Expand Up @@ -436,7 +449,7 @@ class PackedAnnoyIndexer {
S const max_n_descendants = _K - 1;

if (isz <= max_n_descendants && (!is_root || (size_t)_n_items <= (size_t)max_n_descendants || isz == 1)) {
if( !is_root )
if (!is_root)
// only non-roots can have indices only nodes!
return _append_indices(indices);

Expand Down Expand Up @@ -598,6 +611,8 @@ class PackedAnnoySearcher {

size_t n_nodes = (S)((_mapping.size - sizeof_indices) / _s);



// Find the roots by scanning the end of the file and taking the nodes with most descendants
std::vector<S> roots;
roots.clear();
Expand All @@ -623,7 +638,7 @@ class PackedAnnoySearcher {
_roots_q.emplace_back(Distance::template pq_initial_value<T>(), r);
}
std::make_heap(_roots_q.begin(), _roots_q.end());
_n_items = m;
_n_items = m != -1 ? m : 0;

return true;
}
Expand Down
8 changes: 4 additions & 4 deletions src/packutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,11 @@ float decode_and_dot_i16_f32( uint16_t const *__restrict__ in, float const *__re
__m256i s = _mm256_lddqu_si256( (__m256i const*)(in) );
__m256i ai = _mm256_srai_epi32(_mm256_unpacklo_epi16(s, s), 16);
__m256 a = _mm256_mul_ps(_mm256_cvtepi32_ps(ai), mm1);
mx = _mm256_load_ps(y);
mx = _mm256_loadu_ps(y);
__m256i bi = _mm256_srai_epi32(_mm256_unpackhi_epi16(s, s), 16);
msum1 = _mm256_add_ps (msum1, _mm256_mul_ps (a, mx));
__m256 b = _mm256_mul_ps(_mm256_cvtepi32_ps(bi), mm1);
my = _mm256_load_ps(y + 8);
my = _mm256_loadu_ps(y + 8);
msum2 = _mm256_add_ps (msum2, _mm256_mul_ps (b, my));
in += 16;
y += 16;
Expand Down Expand Up @@ -159,12 +159,12 @@ inline float decode_and_euclidean_distance_i16_f32( uint16_t const *__restrict__
__m256i s = _mm256_lddqu_si256( (__m256i const*)(in) );
__m256i ai = _mm256_srai_epi32(_mm256_unpacklo_epi16(s, s), 16);
__m256 a = _mm256_mul_ps(_mm256_cvtepi32_ps(ai), mm1);
mx = _mm256_load_ps(y);
mx = _mm256_loadu_ps(y);
__m256i bi = _mm256_srai_epi32(_mm256_unpackhi_epi16(s, s), 16);
__m256 d1 = _mm256_sub_ps (a, mx);
msum1 = _mm256_add_ps (msum1, _mm256_mul_ps (d1, d1));
__m256 b = _mm256_mul_ps(_mm256_cvtepi32_ps(bi), mm1);
my = _mm256_load_ps(y + 8);
my = _mm256_loadu_ps(y + 8);
__m256 d2 = _mm256_sub_ps (b, my);
msum2 = _mm256_add_ps (msum2, _mm256_mul_ps (d2, d2));
in += 16;
Expand Down

0 comments on commit fde86b2

Please sign in to comment.