Skip to content

Commit

Permalink
😊
Browse files Browse the repository at this point in the history
  • Loading branch information
abbycin committed Feb 5, 2017
1 parent 79160bd commit b5ff255
Show file tree
Hide file tree
Showing 12 changed files with 901 additions and 0 deletions.
13 changes: 13 additions & 0 deletions file_sort/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Test (sort numbers in a big file)

### bit.cpp

this file use `std::bitset` to handle this problem, and it's efficient is small scale numbers(e.g, no more than 1M numbers).

### bs.cpp

this file works like `divide-conquer`, it split a big file into many small files, and then sort each file individually, then travel through all these small files read one number each file and find the smallset one(or biggest one) and then write it to a new file(all numbers in this file will be sorted).

### rand.cpp

this file will generate random numbers and write them to a file(some numbers in this file are duplicate).
84 changes: 84 additions & 0 deletions file_sort/bit.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*********************************************************
File Name:bit.cpp
Author: Abby Cin
Mail: [email protected]
Created Time: Sat 01 Oct 2016 09:58:50 AM CST
**********************************************************/

#include <iostream>
#include <chrono>
#include <bitset>

int main(int argc, char* argv[])
{
if(argc != 3)
{
fprintf(stderr, "%s input output\n", argv[0]);
return 1;
}
auto beg = std::chrono::steady_clock::now();
const int size = 1'000'000;
const int max_scan = size / 2 + 1;
std::bitset<max_scan> bit_map;
bit_map.reset();
int duplicate[max_scan] = {0};

FILE* fin = fopen(argv[1], "r");
int n;

while(fscanf(fin, "%d\n", &n) != EOF)
{
if(n < max_scan)
{
if(bit_map.test(n))
duplicate[n] += 1;
else
bit_map.set(n, true);
}
}

FILE* fout = fopen(argv[2], "w");
int i;

for(i = 0; i < max_scan; ++i)
{
if(bit_map[i] == true)
{
for(int j = 0; j < duplicate[i]; ++j)
fprintf(fout, "%d\n", i);
fprintf(fout, "%d\n", i);
}
}

fseek(fin, 0, SEEK_SET);
bit_map.reset();
for(i = 0; i < max_scan; ++i)
duplicate[i] = 0;

while(fscanf(fin, "%d\n", &n) != EOF)
{
if(n >= max_scan && n <= size)
{
n -= max_scan;
if(bit_map.test(n))
duplicate[n] += 1;
else
bit_map.set(n, true);
}
}

for(i = 0; i < max_scan; ++i)
{
if(bit_map[i] == true)
{
for(int j = 0; j < duplicate[i]; ++j)
fprintf(fout, "%d\n", i + max_scan);
fprintf(fout, "%d\n", i + max_scan);
}
}
fclose(fin);
fclose(fout);
auto end = std::chrono::steady_clock::now();
auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(end - beg);
std::cout << dur.count() << "ms\n";
}
159 changes: 159 additions & 0 deletions file_sort/bs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/*********************************************************
File Name:bs.cpp
Author: Abby Cin
Mail: [email protected]
Created Time: Tue 27 Sep 2016 04:30:51 PM CST
**********************************************************/

#include <iostream>
#include <cstring>
#include <algorithm>
#include <chrono>

#define failure(func, line) \
do \
{ \
fprintf(stderr, "%s : %d\t", func, line); \
perror(func); \
std::terminate(); \
} while(0)

class Bigdata
{
public:
Bigdata(long limit, const std::string& input, const std::string& output)
: slot_(limit), in_(input), out_(output), count_(0)
{
}

void sort()
{
split_files();
merge_files();
do_clean();
}

private:
const long slot_;
std::string in_, out_;
long count_;

FILE* get_tmpfd()
{
char name[50];
sprintf(name, "tmp_%ld", count_++);
FILE* tmpfd = fopen(name, "w");
if(tmpfd == nullptr)
failure(__func__, __LINE__);
return tmpfd;
}

void split_files()
{
FILE* fd = fopen(in_.c_str(), "r");
if(fd == nullptr)
failure(__func__, __LINE__);
long n = 0;
FILE* tmpfd = get_tmpfd();
long local_count = 0;
long* numbers_ = new long[slot_];
while(fscanf(fd, "%ld\n", &n) != EOF)
{
numbers_[local_count++] = n;
if(local_count == slot_)
{
local_count = 0;
std::sort(numbers_, numbers_ + slot_);
for(long i = 0; i < slot_; ++i)
fprintf(tmpfd, "%ld\n", numbers_[i]);
memset(numbers_, 0, sizeof(long) * slot_);
fclose(tmpfd);
tmpfd = get_tmpfd();
}
}
if(local_count != 0)
{
std::sort(numbers_, numbers_ + slot_);
for(long i = 0; i < slot_; ++i)
fprintf(tmpfd, "%ld\n", numbers_[i]);
}
delete [] numbers_;
fclose(tmpfd);
fclose(fd);
}

void merge_files()
{
FILE* fd = fopen(out_.c_str(), "w");
if(fd == nullptr)
failure(__func__, __LINE__);
FILE* fds[count_];
long data[count_];
bool done[count_];
char name[50];
for(long i = 0; i < count_; ++i)
{
done[i] = false;
data[i] = 0;
sprintf(name, "tmp_%ld", i);
fds[i] = fopen(name, "r");
if(fscanf(fds[i], "%ld\n", &data[i]) == EOF)
{
fclose(fds[i]);
remove(name);
done[i] = true;
}
}
while(true)
{
long j = 0;
while(j < count_ && done[j])
++j;
if(j >= count_)
break;
long minimum = data[j];
for(long i = j + 1; i < count_; ++i)
{
if(!done[i] && minimum > data[i])
{
minimum = data[i];
j = i;
}
}
fprintf(fd, "%ld\n", minimum);
if(fscanf(fds[j], "%ld\n", &data[j]) == EOF)
{
fclose(fds[j]);
sprintf(name, "tmp_%ld", j);
remove(name);
done[j] = true;
}
}
fclose(fd);
}

void do_clean()
{
char name[50];
for(long i = 0; i < count_; ++i)
{
sprintf(name, "tmp_%ld", i);
remove(name);
}
}
};

int main(int argc, char* argv[])
{
if(argc != 4)
{
fprintf(stderr, "%s num_per_file input output\n", argv[0]);
return 1;
}
auto beg = std::chrono::steady_clock::now();
Bigdata bigdata(std::stol(argv[1]), argv[2], argv[3]);
bigdata.sort();
auto end = std::chrono::steady_clock::now();
auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(end - beg);
std::cout << dur.count() << "ms\n";
}
28 changes: 28 additions & 0 deletions file_sort/rand.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*********************************************************
File Name:rand.cpp
Author: Abby Cin
Mail: [email protected]
Created Time: Sat 24 Sep 2016 03:39:01 PM CST
**********************************************************/

#include <iostream>
#include <random>

int main(int argc, char* argv[])
{
if(argc != 3)
{
fprintf(stderr, "%s num output\n", argv[0]);
return 1;
}
std::mt19937 rng;
auto n = std::stol(argv[1]);
rng.seed(std::random_device()());
std::uniform_int_distribution<std::mt19937::result_type> dist(1, n);
FILE* fp = fopen(argv[2], "w");
for(; n > 0; --n)
{
fprintf(fp, "%d\n", dist(rng));
}
fclose(fp);
}
65 changes: 65 additions & 0 deletions file_sort/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash

compile()
{
for i in $(ls *.cpp)
do
out=$(echo $i | sed 's/.cpp//')
g++ -std=c++14 $i -o $out -O3 -s
done
}

mkdata()
{
./rand 1000000 data
}

clean()
{
echo -e "\e[32mcleaning...\e[0m"
rm -f rand bs bit data bs_out bit_out
echo -e "\e[32mexit.\e[0m"
}

run_bi()
{
# bitset's template argument must be deternined at compile time
echo "bit test is running..."
./bit data bit_out
echo "done!"
}

run_bs()
{
echo "bs test is running..."
./bs 500000 data bs_out
echo "done!"
}

big_file_test()
{
echo "bs test is running on big file..."
rm -f data bs_out
./rand 100000000 data
./bs 2000000 data bs_out
echo "done!"
}

main()
{
compile
mkdata
run_bi
run_bs
echo "--------------------"
du -h *
echo "--------------------"
echo "now test big file sort..."
big_file_test
echo "--------------------"
du -h *
echo "--------------------"
clean
}

main
11 changes: 11 additions & 0 deletions signal/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Signal
A simple callback wrapper for pointers to class member functions and pointers to functions

# How it works
- Variable template arguments
- Default template arguments
- Traits
- Mutex

# Requirement
A complier with support for C++11
Loading

0 comments on commit b5ff255

Please sign in to comment.