-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
901 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Test (sort numbers in a big file) | ||
|
||
### bit.cpp | ||
|
||
this file use `std::bitset` to handle this problem, and it's efficient is small scale numbers(e.g, no more than 1M numbers). | ||
|
||
### bs.cpp | ||
|
||
this file works like `divide-conquer`, it split a big file into many small files, and then sort each file individually, then travel through all these small files read one number each file and find the smallset one(or biggest one) and then write it to a new file(all numbers in this file will be sorted). | ||
|
||
### rand.cpp | ||
|
||
this file will generate random numbers and write them to a file(some numbers in this file are duplicate). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/********************************************************* | ||
File Name:bit.cpp | ||
Author: Abby Cin | ||
Mail: [email protected] | ||
Created Time: Sat 01 Oct 2016 09:58:50 AM CST | ||
**********************************************************/ | ||
|
||
#include <iostream> | ||
#include <chrono> | ||
#include <bitset> | ||
|
||
int main(int argc, char* argv[]) | ||
{ | ||
if(argc != 3) | ||
{ | ||
fprintf(stderr, "%s input output\n", argv[0]); | ||
return 1; | ||
} | ||
auto beg = std::chrono::steady_clock::now(); | ||
const int size = 1'000'000; | ||
const int max_scan = size / 2 + 1; | ||
std::bitset<max_scan> bit_map; | ||
bit_map.reset(); | ||
int duplicate[max_scan] = {0}; | ||
|
||
FILE* fin = fopen(argv[1], "r"); | ||
int n; | ||
|
||
while(fscanf(fin, "%d\n", &n) != EOF) | ||
{ | ||
if(n < max_scan) | ||
{ | ||
if(bit_map.test(n)) | ||
duplicate[n] += 1; | ||
else | ||
bit_map.set(n, true); | ||
} | ||
} | ||
|
||
FILE* fout = fopen(argv[2], "w"); | ||
int i; | ||
|
||
for(i = 0; i < max_scan; ++i) | ||
{ | ||
if(bit_map[i] == true) | ||
{ | ||
for(int j = 0; j < duplicate[i]; ++j) | ||
fprintf(fout, "%d\n", i); | ||
fprintf(fout, "%d\n", i); | ||
} | ||
} | ||
|
||
fseek(fin, 0, SEEK_SET); | ||
bit_map.reset(); | ||
for(i = 0; i < max_scan; ++i) | ||
duplicate[i] = 0; | ||
|
||
while(fscanf(fin, "%d\n", &n) != EOF) | ||
{ | ||
if(n >= max_scan && n <= size) | ||
{ | ||
n -= max_scan; | ||
if(bit_map.test(n)) | ||
duplicate[n] += 1; | ||
else | ||
bit_map.set(n, true); | ||
} | ||
} | ||
|
||
for(i = 0; i < max_scan; ++i) | ||
{ | ||
if(bit_map[i] == true) | ||
{ | ||
for(int j = 0; j < duplicate[i]; ++j) | ||
fprintf(fout, "%d\n", i + max_scan); | ||
fprintf(fout, "%d\n", i + max_scan); | ||
} | ||
} | ||
fclose(fin); | ||
fclose(fout); | ||
auto end = std::chrono::steady_clock::now(); | ||
auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(end - beg); | ||
std::cout << dur.count() << "ms\n"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
/********************************************************* | ||
File Name:bs.cpp | ||
Author: Abby Cin | ||
Mail: [email protected] | ||
Created Time: Tue 27 Sep 2016 04:30:51 PM CST | ||
**********************************************************/ | ||
|
||
#include <iostream> | ||
#include <cstring> | ||
#include <algorithm> | ||
#include <chrono> | ||
|
||
#define failure(func, line) \ | ||
do \ | ||
{ \ | ||
fprintf(stderr, "%s : %d\t", func, line); \ | ||
perror(func); \ | ||
std::terminate(); \ | ||
} while(0) | ||
|
||
class Bigdata | ||
{ | ||
public: | ||
Bigdata(long limit, const std::string& input, const std::string& output) | ||
: slot_(limit), in_(input), out_(output), count_(0) | ||
{ | ||
} | ||
|
||
void sort() | ||
{ | ||
split_files(); | ||
merge_files(); | ||
do_clean(); | ||
} | ||
|
||
private: | ||
const long slot_; | ||
std::string in_, out_; | ||
long count_; | ||
|
||
FILE* get_tmpfd() | ||
{ | ||
char name[50]; | ||
sprintf(name, "tmp_%ld", count_++); | ||
FILE* tmpfd = fopen(name, "w"); | ||
if(tmpfd == nullptr) | ||
failure(__func__, __LINE__); | ||
return tmpfd; | ||
} | ||
|
||
void split_files() | ||
{ | ||
FILE* fd = fopen(in_.c_str(), "r"); | ||
if(fd == nullptr) | ||
failure(__func__, __LINE__); | ||
long n = 0; | ||
FILE* tmpfd = get_tmpfd(); | ||
long local_count = 0; | ||
long* numbers_ = new long[slot_]; | ||
while(fscanf(fd, "%ld\n", &n) != EOF) | ||
{ | ||
numbers_[local_count++] = n; | ||
if(local_count == slot_) | ||
{ | ||
local_count = 0; | ||
std::sort(numbers_, numbers_ + slot_); | ||
for(long i = 0; i < slot_; ++i) | ||
fprintf(tmpfd, "%ld\n", numbers_[i]); | ||
memset(numbers_, 0, sizeof(long) * slot_); | ||
fclose(tmpfd); | ||
tmpfd = get_tmpfd(); | ||
} | ||
} | ||
if(local_count != 0) | ||
{ | ||
std::sort(numbers_, numbers_ + slot_); | ||
for(long i = 0; i < slot_; ++i) | ||
fprintf(tmpfd, "%ld\n", numbers_[i]); | ||
} | ||
delete [] numbers_; | ||
fclose(tmpfd); | ||
fclose(fd); | ||
} | ||
|
||
void merge_files() | ||
{ | ||
FILE* fd = fopen(out_.c_str(), "w"); | ||
if(fd == nullptr) | ||
failure(__func__, __LINE__); | ||
FILE* fds[count_]; | ||
long data[count_]; | ||
bool done[count_]; | ||
char name[50]; | ||
for(long i = 0; i < count_; ++i) | ||
{ | ||
done[i] = false; | ||
data[i] = 0; | ||
sprintf(name, "tmp_%ld", i); | ||
fds[i] = fopen(name, "r"); | ||
if(fscanf(fds[i], "%ld\n", &data[i]) == EOF) | ||
{ | ||
fclose(fds[i]); | ||
remove(name); | ||
done[i] = true; | ||
} | ||
} | ||
while(true) | ||
{ | ||
long j = 0; | ||
while(j < count_ && done[j]) | ||
++j; | ||
if(j >= count_) | ||
break; | ||
long minimum = data[j]; | ||
for(long i = j + 1; i < count_; ++i) | ||
{ | ||
if(!done[i] && minimum > data[i]) | ||
{ | ||
minimum = data[i]; | ||
j = i; | ||
} | ||
} | ||
fprintf(fd, "%ld\n", minimum); | ||
if(fscanf(fds[j], "%ld\n", &data[j]) == EOF) | ||
{ | ||
fclose(fds[j]); | ||
sprintf(name, "tmp_%ld", j); | ||
remove(name); | ||
done[j] = true; | ||
} | ||
} | ||
fclose(fd); | ||
} | ||
|
||
void do_clean() | ||
{ | ||
char name[50]; | ||
for(long i = 0; i < count_; ++i) | ||
{ | ||
sprintf(name, "tmp_%ld", i); | ||
remove(name); | ||
} | ||
} | ||
}; | ||
|
||
int main(int argc, char* argv[]) | ||
{ | ||
if(argc != 4) | ||
{ | ||
fprintf(stderr, "%s num_per_file input output\n", argv[0]); | ||
return 1; | ||
} | ||
auto beg = std::chrono::steady_clock::now(); | ||
Bigdata bigdata(std::stol(argv[1]), argv[2], argv[3]); | ||
bigdata.sort(); | ||
auto end = std::chrono::steady_clock::now(); | ||
auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(end - beg); | ||
std::cout << dur.count() << "ms\n"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/********************************************************* | ||
File Name:rand.cpp | ||
Author: Abby Cin | ||
Mail: [email protected] | ||
Created Time: Sat 24 Sep 2016 03:39:01 PM CST | ||
**********************************************************/ | ||
|
||
#include <iostream> | ||
#include <random> | ||
|
||
int main(int argc, char* argv[]) | ||
{ | ||
if(argc != 3) | ||
{ | ||
fprintf(stderr, "%s num output\n", argv[0]); | ||
return 1; | ||
} | ||
std::mt19937 rng; | ||
auto n = std::stol(argv[1]); | ||
rng.seed(std::random_device()()); | ||
std::uniform_int_distribution<std::mt19937::result_type> dist(1, n); | ||
FILE* fp = fopen(argv[2], "w"); | ||
for(; n > 0; --n) | ||
{ | ||
fprintf(fp, "%d\n", dist(rng)); | ||
} | ||
fclose(fp); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/bin/bash | ||
|
||
compile() | ||
{ | ||
for i in $(ls *.cpp) | ||
do | ||
out=$(echo $i | sed 's/.cpp//') | ||
g++ -std=c++14 $i -o $out -O3 -s | ||
done | ||
} | ||
|
||
mkdata() | ||
{ | ||
./rand 1000000 data | ||
} | ||
|
||
clean() | ||
{ | ||
echo -e "\e[32mcleaning...\e[0m" | ||
rm -f rand bs bit data bs_out bit_out | ||
echo -e "\e[32mexit.\e[0m" | ||
} | ||
|
||
run_bi() | ||
{ | ||
# bitset's template argument must be deternined at compile time | ||
echo "bit test is running..." | ||
./bit data bit_out | ||
echo "done!" | ||
} | ||
|
||
run_bs() | ||
{ | ||
echo "bs test is running..." | ||
./bs 500000 data bs_out | ||
echo "done!" | ||
} | ||
|
||
big_file_test() | ||
{ | ||
echo "bs test is running on big file..." | ||
rm -f data bs_out | ||
./rand 100000000 data | ||
./bs 2000000 data bs_out | ||
echo "done!" | ||
} | ||
|
||
main() | ||
{ | ||
compile | ||
mkdata | ||
run_bi | ||
run_bs | ||
echo "--------------------" | ||
du -h * | ||
echo "--------------------" | ||
echo "now test big file sort..." | ||
big_file_test | ||
echo "--------------------" | ||
du -h * | ||
echo "--------------------" | ||
clean | ||
} | ||
|
||
main |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Signal | ||
A simple callback wrapper for pointers to class member functions and pointers to functions | ||
|
||
# How it works | ||
- Variable template arguments | ||
- Default template arguments | ||
- Traits | ||
- Mutex | ||
|
||
# Requirement | ||
A complier with support for C++11 |
Oops, something went wrong.