Added bloom_filter with example and benchmarks.

NVIDIA · Aug 9, 2021 · 1261426 · 1261426
1 parent b1fea0c
commit 1261426
Show file tree

Hide file tree

Showing 8 changed files with 1,195 additions and 5 deletions.
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
@@ -12,10 +12,19 @@ CPMAddPackage(
     "RUN_HAVE_STD_REGEX 0" #
 )
 
-if (benchmark_ADDED)
-  # patch google benchmark target
-  set_target_properties(benchmark PROPERTIES CXX_STANDARD 14)
-endif()
+#if (benchmark_ADDED)
+#  # patch google benchmark target
+#  set_target_properties(benchmark PROPERTIES CXX_STANDARD 14)
+#endif()
+
+CPMAddPackage(
+  NAME nvbench
+  GITHUB_REPOSITORY NVIDIA/nvbench
+  GIT_TAG main
+  GIT_SHALLOW TRUE
+)
+
+set_target_properties(benchmark PROPERTIES CXX_STANDARD 17)
 
 ###################################################################################################
 # - compiler function -----------------------------------------------------------------------------
@@ -24,7 +33,7 @@ function(ConfigureBench BENCH_NAME BENCH_SRC)
     add_executable(${BENCH_NAME} "${BENCH_SRC}")
     set_target_properties(${BENCH_NAME} PROPERTIES
                                         POSITION_INDEPENDENT_CODE ON
-                                        RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks")
+                                        RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmarks")
     target_include_directories(${BENCH_NAME} PRIVATE
                                              "${CMAKE_CURRENT_SOURCE_DIR}")
     target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda --expt-relaxed-constexpr -Xcompiler -Wno-subobject-linkage)
@@ -35,6 +44,23 @@ function(ConfigureBench BENCH_NAME BENCH_SRC)
                                         CUDA::cudart)
 endfunction(ConfigureBench)
 
+###################################################################################################
+function(ConfigureNVBench BENCH_NAME BENCH_SRC)
+    add_executable(${BENCH_NAME} "${BENCH_SRC}")
+    set_target_properties(${BENCH_NAME} PROPERTIES
+                                        POSITION_INDEPENDENT_CODE ON
+                                        RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmarks"
+                                        COMPILE_FLAGS -DNVBENCH_MODULE)
+    target_include_directories(${BENCH_NAME} PRIVATE
+                                             "${CMAKE_CURRENT_SOURCE_DIR}")
+                                             #"${NVBench_SOURCE_DIR}")
+    target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda --expt-relaxed-constexpr)
+    target_link_libraries(${BENCH_NAME} PRIVATE
+                                        nvbench::main
+                                        pthread
+                                        cuco)
+endfunction(ConfigureNVBench)
+
 ###################################################################################################
 ### test sources ##################################################################################
 ###################################################################################################
@@ -50,3 +76,7 @@ ConfigureBench(STATIC_MAP_BENCH "${STATIC_MAP_BENCH_SRC}")
 ###################################################################################################
 set(RBK_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/reduce_by_key/reduce_by_key.cu")
 ConfigureBench(RBK_BENCH "${RBK_BENCH_SRC}")
+
+###################################################################################################
+set(BLOOM_FILTER_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/bloom_filter/bloom_filter_bench.cu")
+ConfigureNVBench(BLOOM_FILTER_BENCH "${BLOOM_FILTER_BENCH_SRC}")
diff --git a/benchmarks/bloom_filter/bloom_filter_bench.cu b/benchmarks/bloom_filter/bloom_filter_bench.cu
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <thrust/count.h>
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+#include <thrust/sequence.h>
+#include <cuco/bloom_filter.cuh>
+#include <nvbench/nvbench.cuh>
+
+/**
+ * @brief A benchmark evaluating insert performance.
+ */
+template <typename Key, typename Slot>
+void nvbench_cuco_bloom_filter_insert(nvbench::state& state, nvbench::type_list<Key, Slot>)
+{
+  using filter_type =
+    cuco::bloom_filter<Key, cuda::thread_scope_device, cuco::cuda_allocator<char>, Slot>;
+
+  auto const num_keys   = state.get_int64("NumInputs");
+  auto const num_bits   = state.get_int64("NumBits");
+  auto const num_hashes = state.get_int64("NumHashes");
+
+  thrust::device_vector<Key> keys(num_keys * 2);
+  thrust::sequence(keys.begin(), keys.end(), 1);
+
+  auto tp_begin = keys.begin();
+  auto tp_end   = tp_begin + num_keys;
+
+  {  // determine false-positive rate
+    auto tn_begin = tp_end;
+    auto tn_end   = keys.end();
+
+    filter_type filter(num_bits, num_hashes);
+    filter.insert(tp_begin, tp_end);
+
+    thrust::device_vector<bool> result(num_keys, false);
+    filter.contains(tn_begin, tn_end, result.begin());
+
+    float fp = thrust::count(thrust::device, result.begin(), result.end(), true);
+
+    auto& summ = state.add_summary("False-Positive Rate");
+    summ.set_string("hint", "FPR");
+    summ.set_string("short_name", "FPR");
+    summ.set_string("description", "False-positive rate of the bloom filter.");
+    summ.set_float64("value", float(fp) / num_keys);
+  }
+
+  state.add_element_count(num_keys);
+  state.add_global_memory_writes<Key>(num_keys);
+
+  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+             [&](nvbench::launch& launch, auto& timer) {
+               filter_type filter(num_bits, num_hashes);
+
+               timer.start();
+               filter.insert(tp_begin, tp_end, launch.get_stream());
+               timer.stop();
+             });
+}
+
+/**
+ * @brief A benchmark evaluating insert performance.
+ */
+template <typename Key, typename Slot>
+void nvbench_cuco_bloom_filter_contains(nvbench::state& state, nvbench::type_list<Key, Slot>)
+{
+  using filter_type =
+    cuco::bloom_filter<Key, cuda::thread_scope_device, cuco::cuda_allocator<char>, Slot>;
+
+  auto const num_keys   = state.get_int64("NumInputs");
+  auto const num_bits   = state.get_int64("NumBits");
+  auto const num_hashes = state.get_int64("NumHashes");
+
+  thrust::device_vector<Key> keys(num_keys * 2);
+  thrust::sequence(thrust::device, keys.begin(), keys.end(), 1);
+
+  auto tp_begin = keys.begin();
+  auto tp_end   = tp_begin + (num_keys);
+
+  auto tn_begin = tp_end;
+  auto tn_end   = keys.end();
+
+  filter_type filter(num_bits, num_hashes);
+  filter.insert(tp_begin, tp_end);
+
+  // determine false-positive rate
+  thrust::device_vector<bool> result(num_keys, false);
+  filter.contains(tn_begin, tn_end, result.begin());
+
+  float fp = thrust::count(thrust::device, result.begin(), result.end(), true);
+
+  auto& summ = state.add_summary("False-Positive Rate");
+  summ.set_string("hint", "FPR");
+  summ.set_string("short_name", "FPR");
+  summ.set_string("description", "False-positive rate of the bloom filter.");
+  summ.set_float64("value", float(fp) / num_keys);
+
+  state.add_element_count(num_keys);
+  state.add_global_memory_reads<Key>(num_keys);
+
+  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+             [&](nvbench::launch& launch, auto& timer) {
+               timer.start();
+               filter.contains(tp_begin, tp_end, result.begin(), launch.get_stream());
+               timer.stop();
+             });
+}
+
+// type parameter dimensions for benchmark
+using key_type_range  = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
+using slot_type_range = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
+
+// benchmark setups
+
+NVBENCH_BENCH_TYPES(nvbench_cuco_bloom_filter_insert,
+                    NVBENCH_TYPE_AXES(key_type_range, slot_type_range))
+  .set_name("nvbench_cuco_bloom_filter_insert")
+  .set_type_axes_names({"Key", "Slot"})
+  .set_max_noise(3)                            // Custom noise: 3%. By default: 0.5%.
+  .add_int64_axis("NumInputs", {100'000'000})  // Total number of keys
+  .add_int64_axis("NumBits", {10'000'000'000, 100'000'000'000})  //, 100'000'000'000})
+  .add_int64_axis("NumHashes", nvbench::range(2, 10, 2));
+
+NVBENCH_BENCH_TYPES(nvbench_cuco_bloom_filter_contains,
+                    NVBENCH_TYPE_AXES(key_type_range, slot_type_range))
+  .set_name("nvbench_cuco_bloom_filter_contains")
+  .set_type_axes_names({"Key", "Slot"})
+  .set_max_noise(3)                            // Custom noise: 3%. By default: 0.5%.
+  .add_int64_axis("NumInputs", {100'000'000})  // Total number of keys
+  .add_int64_axis("NumBits", {10'000'000'000, 100'000'000'000})
+  .add_int64_axis("NumHashes", nvbench::range(6, 12, 2));
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -18,3 +18,4 @@ endfunction(ConfigureExample)
 ###################################################################################################
 
 ConfigureExample(STATIC_MAP_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/static_map_example.cu")
+ConfigureExample(BLOOM_FILTER_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/bloom_filter/bloom_filter_example.cu")
diff --git a/examples/bloom_filter/bloom_filter_example.cu b/examples/bloom_filter/bloom_filter_example.cu
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <thrust/count.h>
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+#include <thrust/sequence.h>
+#include <iostream>
+
+#include <cuco/bloom_filter.cuh>
+
+int main(void)
+{
+  // Generate 10'000 keys and insert the first 5'000 into the filter.
+  int const num_keys = 10'000;
+  int const num_tp   = num_keys * 0.5;
+  int const num_tn   = num_keys - num_tp;
+
+  // Spawn a filter with 1'000'000 bits and 6-bit patterns for each key.
+  cuco::bloom_filter<int> filter{num_tp * 10, 6};
+
+  thrust::device_vector<int> keys(num_keys);
+  thrust::sequence(keys.begin(), keys.end(), 1);
+
+  auto tp_begin = keys.begin();
+  auto tp_end   = tp_begin + num_tp;
+  auto tn_begin = tp_end;
+  auto tn_end   = keys.end();
+
+  // Insert the first half of the keys.
+  filter.insert(tp_begin, tp_end);
+
+  thrust::device_vector<bool> tp_result(num_tp, false);
+  thrust::device_vector<bool> tn_result(num_keys - num_tp, false);
+
+  // Query the filter for the previously inserted keys.
+  // This should result in a true-positive rate of TPR=1.
+  filter.contains(tp_begin, tp_end, tp_result.begin());
+
+  // Query the filter for the keys that are not present in the filter.
+  // Since bloom filters are probalistic data structures, the filter
+  // exhibits a false-positive rate FPR>0 depending on the number of bits in
+  // the filter and the number of hashes used per key.
+  filter.contains(tn_begin, tn_end, tn_result.begin());
+
+  float tp_rate =
+    float(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / float(num_tp);
+  float fp_rate =
+    float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn);
+
+  std::cout << "TPR=" << tp_rate << " FPR=" << fp_rate << std::endl;
+
+  return 0;
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -18,3 +18,4 @@ endfunction(ConfigureExample)
		###################################################################################################

		ConfigureExample(STATIC_MAP_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/static_map_example.cu")
		ConfigureExample(BLOOM_FILTER_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/bloom_filter/bloom_filter_example.cu")