-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbloom.h
100 lines (89 loc) · 2.04 KB
/
bloom.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/*********************************************************
File Name:bloom_filter.h
Author: Abby Cin
Mail: [email protected]
Created Time: Sun 23 Aug 2020 03:51:01 PM CST
**********************************************************/
#ifndef BLOOM_FILTER_H
#define BLOOM_FILTER_H
#include <cmath>
#include <functional>
#include <string_view>
#include <vector>
#include "MurmurHash3.h"
#include <iostream>
class BloomFilter {
public:
template<typename... Args>
static void print(Args &&...args)
{
((std::cout << args << ' '), ...);
std::cout << '\n';
}
// n: collection size
// r: expect false positive ratio
BloomFilter(size_t n, double r)
{
n_ = n;
nb_ = -1 * (n_ * std::log(r)) / std::pow(std::log(2), 2);
k_ = std::ceil(std::log(2) * nb_ / n_);
init();
print("size:", bits_.size(), nb_);
}
[[nodiscard]] double estimate() const
{
return std::pow(1 - std::exp(-((double)n_ * k_ / nb_)), k_);
}
void add(std::string_view x)
{
for (auto &f : hash_) {
auto h = f(x) % nb_;
size_t span = h / width_;
size_t slot = h % width_;
bits_[span] |= slot;
}
}
bool test(std::string_view x)
{
for (auto &f : hash_) {
auto h = f(x) % nb_;
size_t span = h / width_;
size_t slot = h % width_;
if (!(bits_[span] & slot)) {
return false;
}
}
return true;
}
private:
using hash_func = std::function<int64_t(std::string_view)>;
constexpr static size_t width_ = sizeof(uint64_t) * 8;
size_t n_;
size_t nb_;
size_t k_;
std::vector<uint64_t> bits_;
std::vector<hash_func> hash_;
void init()
{
bits_.resize(nb_ / width_ + 1); // ceil
for (size_t i = 0; i < k_; ++i) {
hash_.emplace_back(
[i](std::string_view x) -> uint64_t
{
uint32_t h = 0;
uint32_t l = 0;
hash(x, &h, &l);
uint64_t r = h;
r <<= sizeof(uint32_t);
r |= (i + 1) * l;
return r;
});
}
}
static void hash(std::string_view x, uint32_t *h, uint32_t *l)
{
MurmurHash3_x86_32(x.data(), x.size(), 7, l);
MurmurHash3_x86_32(x.data(), x.size(), 17, h);
}
};
#endif // BLOOM_FILTER_H