-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
58 changed files
with
5,319 additions
and
5,429 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
Language: Cpp | ||
# BasedOnStyle: LLVM | ||
AccessModifierOffset: -2 | ||
AlignAfterOpenBracket: Align | ||
AlignConsecutiveAssignments: false | ||
AlignConsecutiveDeclarations: false | ||
AlignEscapedNewlines: DontAlign | ||
AlignOperands: true | ||
AlignTrailingComments: false | ||
AllowAllParametersOfDeclarationOnNextLine: true | ||
AllowShortBlocksOnASingleLine: false | ||
AllowShortCaseLabelsOnASingleLine: false | ||
AllowShortFunctionsOnASingleLine: All | ||
AllowShortIfStatementsOnASingleLine: false | ||
AllowShortLoopsOnASingleLine: false | ||
AlwaysBreakAfterDefinitionReturnType: None | ||
AlwaysBreakAfterReturnType: None | ||
AlwaysBreakBeforeMultilineStrings: false | ||
AlwaysBreakTemplateDeclarations: Yes | ||
BinPackArguments: true | ||
BinPackParameters: true | ||
BreakBeforeBraces: Allman | ||
BraceWrapping: | ||
AfterClass: true | ||
AfterControlStatement: true | ||
AfterEnum: true | ||
AfterFunction: true | ||
AfterNamespace: true | ||
AfterObjCDeclaration: true | ||
AfterStruct: true | ||
AfterUnion: true | ||
AfterExternBlock: true | ||
BeforeCatch: true | ||
BeforeElse: true | ||
IndentBraces: true | ||
SplitEmptyFunction: false | ||
SplitEmptyRecord: false | ||
SplitEmptyNamespace: false | ||
BreakBeforeBinaryOperators: None | ||
BreakBeforeInheritanceComma: false | ||
BreakInheritanceList: BeforeColon | ||
BreakBeforeTernaryOperators: true | ||
BreakConstructorInitializersBeforeComma: false | ||
BreakConstructorInitializers: BeforeColon | ||
BreakAfterJavaFieldAnnotations: false | ||
BreakStringLiterals: true | ||
ColumnLimit: 120 | ||
CommentPragmas: '^ [NOTE|WARNING|TODO|FIXME]:' | ||
CompactNamespaces: false | ||
ConstructorInitializerAllOnOneLineOrOnePerLine: false | ||
ConstructorInitializerIndentWidth: 4 | ||
ContinuationIndentWidth: 4 | ||
Cpp11BracedListStyle: true | ||
DerivePointerAlignment: false | ||
DisableFormat: false | ||
FixNamespaceComments: false | ||
ForEachMacros: | ||
- foreach | ||
- Q_FOREACH | ||
- BOOST_FOREACH | ||
IncludeBlocks: Preserve | ||
IncludeCategories: | ||
- Regex: '^"(llvm|llvm-c|clang|clang-c)/' | ||
Priority: 2 | ||
- Regex: '^(<|"(gtest|gmock|isl|json)/)' | ||
Priority: 3 | ||
- Regex: '.*' | ||
Priority: 1 | ||
IncludeIsMainRegex: '(Test)?$' | ||
IndentCaseLabels: false | ||
IndentPPDirectives: None | ||
IndentWidth: 2 | ||
IndentWrappedFunctionNames: false | ||
JavaScriptQuotes: Leave | ||
JavaScriptWrapImports: true | ||
KeepEmptyLinesAtTheStartOfBlocks: true | ||
MacroBlockBegin: '' | ||
MacroBlockEnd: '' | ||
MaxEmptyLinesToKeep: 1 | ||
NamespaceIndentation: All | ||
ObjCBinPackProtocolList: Auto | ||
ObjCBlockIndentWidth: 2 | ||
ObjCSpaceAfterProperty: false | ||
ObjCSpaceBeforeProtocolList: true | ||
PenaltyBreakAssignment: 2 | ||
PenaltyBreakBeforeFirstCallParameter: 19 | ||
PenaltyBreakComment: 300 | ||
PenaltyBreakFirstLessLess: 120 | ||
PenaltyBreakString: 1000 | ||
PenaltyBreakTemplateDeclaration: 10 | ||
PenaltyExcessCharacter: 1000000 | ||
PenaltyReturnTypeOnItsOwnLine: 60 | ||
PointerAlignment: Left | ||
ReflowComments: true | ||
SortIncludes: false | ||
SortUsingDeclarations: false | ||
SpaceAfterCStyleCast: false | ||
SpaceAfterTemplateKeyword: false | ||
SpaceBeforeAssignmentOperators: true | ||
SpaceBeforeCpp11BracedList: false | ||
SpaceBeforeCtorInitializerColon: true | ||
SpaceBeforeInheritanceColon: true | ||
SpaceBeforeParens: Never | ||
SpaceBeforeRangeBasedForLoopColon: false | ||
SpaceInEmptyParentheses: false | ||
SpacesBeforeTrailingComments: 1 | ||
SpacesInAngles: false | ||
SpacesInContainerLiterals: false | ||
SpacesInCStyleCastParentheses: false | ||
SpacesInParentheses: false | ||
SpacesInSquareBrackets: false | ||
Standard: Cpp11 | ||
TabWidth: 2 | ||
UseTab: Never |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
cmake_minimum_required(VERSION 3.15) | ||
project(bloom) | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
set(CMAKE_CXX_EXTENSIONS OFF) | ||
|
||
include_directories(${PROJECT_SOURCE_DIR}/murmur3) | ||
|
||
add_subdirectory(murmur3) | ||
add_executable(bloom bloom.h main.cpp) | ||
target_link_libraries(bloom murmur3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
a naive bloom filter implementation :cherry_blossom::rooster: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
/********************************************************* | ||
File Name:bloom_filter.h | ||
Author: Abby Cin | ||
Mail: [email protected] | ||
Created Time: Sun 23 Aug 2020 03:51:01 PM CST | ||
**********************************************************/ | ||
|
||
#ifndef BLOOM_FILTER_H | ||
#define BLOOM_FILTER_H | ||
|
||
#include <cmath> | ||
#include <functional> | ||
#include <string_view> | ||
#include <vector> | ||
#include "MurmurHash3.h" | ||
#include <iostream> | ||
|
||
class BloomFilter | ||
{ | ||
public: | ||
template<typename... Args> | ||
static void print(Args&&... args) | ||
{ | ||
((std::cout << args << ' '), ...); | ||
std::cout << '\n'; | ||
} | ||
|
||
// n: collection size | ||
// r: expect false positive ratio | ||
BloomFilter(size_t n, double r) | ||
{ | ||
n_ = n; | ||
nb_ = -1 * (n_ * std::log(r)) / std::pow(std::log(2), 2); | ||
k_ = std::ceil(std::log(2) * nb_ / n_); | ||
init(); | ||
print("size:", bits_.size(), nb_); | ||
} | ||
|
||
[[nodiscard]] double estimate() const { return std::pow(1 - std::exp(-((double)n_ * k_ / nb_)), k_); } | ||
|
||
void add(std::string_view x) | ||
{ | ||
for(auto& f: hash_) | ||
{ | ||
auto h = f(x) % nb_; | ||
size_t span = h / width_; | ||
size_t slot = h % width_; | ||
bits_[span] |= slot; | ||
} | ||
} | ||
|
||
bool test(std::string_view x) | ||
{ | ||
for(auto& f: hash_) | ||
{ | ||
auto h = f(x) % nb_; | ||
size_t span = h / width_; | ||
size_t slot = h % width_; | ||
if(!(bits_[span] & slot)) | ||
{ | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
private: | ||
using hash_func = std::function<int64_t(std::string_view)>; | ||
constexpr static size_t width_ = sizeof(uint64_t) * 8; | ||
size_t n_; | ||
size_t nb_; | ||
size_t k_; | ||
std::vector<uint64_t> bits_; | ||
std::vector<hash_func> hash_; | ||
|
||
void init() | ||
{ | ||
bits_.resize(nb_ / width_ + 1); // ceil | ||
for(size_t i = 0; i < k_; ++i) | ||
{ | ||
hash_.emplace_back([i](std::string_view x) -> uint64_t { | ||
uint32_t h = 0; | ||
uint32_t l = 0; | ||
hash(x, &h, &l); | ||
uint64_t r = h; | ||
r <<= sizeof(uint32_t); | ||
r |= (i + 1) * l; | ||
return r; | ||
}); | ||
} | ||
} | ||
|
||
static void hash(std::string_view x, uint32_t* h, uint32_t* l) | ||
{ | ||
MurmurHash3_x86_32(x.data(), x.size(), 7, l); | ||
MurmurHash3_x86_32(x.data(), x.size(), 17, h); | ||
} | ||
}; | ||
|
||
#endif // BLOOM_FILTER_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
/********************************************************* | ||
File Name:main.cpp | ||
Author: Abby Cin | ||
Mail: [email protected] | ||
Created Time: Sun 23 Aug 2020 03:51:10 PM CST | ||
**********************************************************/ | ||
|
||
#include "bloom.h" | ||
|
||
int main() | ||
{ | ||
using namespace std::string_view_literals; | ||
using namespace std; | ||
BloomFilter bl{100, 0.001}; | ||
cout << bl.estimate() << '\n'; | ||
vector<string_view> vs; | ||
vs.emplace_back("are"); | ||
vs.emplace_back("are you"); | ||
vs.emplace_back("are you ok"); | ||
vs.emplace_back("are you ok?"); | ||
|
||
vector<string_view> ha; | ||
ha.emplace_back("are "); | ||
ha.emplace_back("are you "); | ||
ha.emplace_back("are you ok "); | ||
|
||
for(auto x: vs) | ||
{ | ||
bl.add(x); | ||
} | ||
|
||
cout << boolalpha; | ||
for(auto x: vs) | ||
{ | ||
BloomFilter::print(x, "=>", bl.test(x)); | ||
} | ||
|
||
BloomFilter::print("=========================="); | ||
|
||
for(auto x: ha) | ||
{ | ||
BloomFilter::print(x, "=>", bl.test(x)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
add_library(murmur3 MurmurHash3.h MurmurHash3.cpp) |
Oops, something went wrong.