Skip to content

Commit

Permalink
attempt to vectorize hash calculations
Browse files Browse the repository at this point in the history
  • Loading branch information
Gillgamesh committed Sep 6, 2024
1 parent 35211d2 commit 65da85a
Show file tree
Hide file tree
Showing 13 changed files with 3,591 additions and 40 deletions.
23 changes: 16 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,16 @@ if (BUILD_BENCH)
FetchContent_MakeAvailable(benchmark)
endif()

FetchContent_MakeAvailable(GutterTree StreamingUtilities)
# Get libcuckoo (concurrent hash table)

FetchContent_Declare(
libcuckoo

GIT_REPOSITORY https://github.com/efficient/libcuckoo
GIT_TAG master
)

FetchContent_MakeAvailable(GutterTree StreamingUtilities libcuckoo )

# AVAILABLE COMPILATION DEFINITIONS:
# VERIFY_SAMPLES_F Use a deterministic connected-components
Expand All @@ -101,9 +110,9 @@ add_library(GraphZeppelin
src/cc_alg_configuration.cpp
src/sketch.cpp
src/util.cpp)
add_dependencies(GraphZeppelin GutterTree StreamingUtilities)
target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree StreamingUtilities)
target_include_directories(GraphZeppelin PUBLIC include/)
add_dependencies(GraphZeppelin GutterTree StreamingUtilities libcuckoo)
target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree StreamingUtilities libcuckoo)
target_include_directories(GraphZeppelin PUBLIC include/ include/libcuckoo/)
target_compile_options(GraphZeppelin PUBLIC -fopenmp)
target_link_options(GraphZeppelin PUBLIC -fopenmp)
target_compile_definitions(GraphZeppelin PUBLIC XXH_INLINE_ALL)
Expand All @@ -116,9 +125,9 @@ add_library(GraphZeppelinVerifyCC
src/sketch.cpp
src/util.cpp
test/util/graph_verifier.cpp)
add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities)
target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities)
target_include_directories(GraphZeppelinVerifyCC PUBLIC include/ include/test/)
add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities libcuckoo)
target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities libcuckoo)
target_include_directories(GraphZeppelinVerifyCC PUBLIC include/ include/libcuckoo/ include/test/)
target_compile_options(GraphZeppelinVerifyCC PUBLIC -fopenmp)
target_link_options(GraphZeppelinVerifyCC PUBLIC -fopenmp)
target_compile_definitions(GraphZeppelinVerifyCC PUBLIC XXH_INLINE_ALL VERIFY_SAMPLES_F)
Expand Down
55 changes: 52 additions & 3 deletions include/bucket.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,39 @@ namespace Bucket_Boruvka {
* @param max_depth The maximum depth to return
* @return The hash of update_idx using seed_and_col as a seed.
*/
inline static col_hash_t get_index_depth(const vec_t update_idx, const long seed_and_col,
inline static col_hash_t get_index_depth(const vec_t update_idx, const long seed, const long col,
const vec_hash_t max_depth);

inline static void get_all_index_depths(
const vec_t update_idx,
uint32_t *depths_buffer,
const long seed,
const long num_columns,
const vec_hash_t max_depth
) {
XXH128_hash_t *hashes = (XXH128_hash_t*) depths_buffer;
#pragma omp simd
for (int col = 0; col < num_columns -4; col+=4) {
auto hash = XXH3_128bits_withSeed(&update_idx, sizeof(vec_t), seed + 5 * (col / 4) );
hashes[col / 4] = hash;
}
for (int col = 0; col< num_columns - 4; col+=4) {
auto hash = hashes[col / 4];
// auto hash = XXH3_128bits_withSeed(&update_idx, sizeof(vec_t), seed + 5 * (col / 4) );
depths_buffer[col] = (uint32_t) (hash.low64 >> 32);
depths_buffer[col+1] = (uint32_t) (hash.low64 & 0xFFFFFFFF);
depths_buffer[col+2] = (uint32_t) (hash.high64 >> 32);
depths_buffer[col+3] = (uint32_t) (hash.high64 & 0xFFFFFFFF);
}
for (int col = num_columns - (num_columns % 4); col < num_columns; col++) {
depths_buffer[col] = get_index_depth(update_idx, seed, col, max_depth);
}
for (int col = 0; col < num_columns; col++) {
depths_buffer[col] |= (1ull << max_depth); // assert not > max_depth by ORing
depths_buffer[col] = __builtin_ctzll(depths_buffer[col]);
}
}

/**
* Hashes the index for checksumming
* This is used to as a parameter to Bucket::update
Expand Down Expand Up @@ -71,9 +101,28 @@ inline bool Bucket_Boruvka::is_empty(const Bucket &bucket) {
return (bucket.alpha | bucket.gamma) == 0;
}

inline col_hash_t Bucket_Boruvka::get_index_depth(const vec_t update_idx, const long seed_and_col,
inline col_hash_t Bucket_Boruvka::get_index_depth(const vec_t update_idx, const long seed, const long col,
const vec_hash_t max_depth) {
col_hash_t depth_hash = XXH3_128bits_withSeed(&update_idx, sizeof(vec_t), seed_and_col).high64;
auto hash = XXH3_128bits_withSeed(&update_idx, sizeof(vec_t), seed + 5 * (col / 4) );
// auto hash = XXH3_128bits_withSeed(&update_idx, sizeof(vec_t), seed + 5 * (col) );
col_hash_t depth_hash = 0;
int offset = col % 4;
switch (offset) {
case 0:
depth_hash = (uint32_t) (hash.low64 >> 32);
break;
case 1:
depth_hash = (uint32_t) (hash.low64 & 0xFFFFFFFF);
break;
case 2:
depth_hash = (uint32_t) (hash.high64 >> 32);
break;
case 3:
depth_hash = (uint32_t) (hash.high64 & 0xFFFFFFFF);
break;
}
// std::cout << "hash " << hash.low64 << " " << hash.high64 << " " << depth_hash << std::endl;
// col_hash_t depth_hash = hash.low64;
depth_hash |= (1ull << max_depth); // assert not > max_depth by ORing
return __builtin_ctzll(depth_hash);
}
Expand Down
2 changes: 2 additions & 0 deletions include/cc_sketch_alg.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "sketch.h"
#include "dsu.h"

#include "cuckoohash_map.hh"

#ifdef VERIFY_SAMPLES_F
#include "test/graph_verifier.h"
#endif
Expand Down
Loading

0 comments on commit 65da85a

Please sign in to comment.