-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLZ.hpp
51 lines (46 loc) · 1.48 KB
/
LZ.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#pragma once
#include <iostream>
#include <Eigen/Dense>
#include <vector>
using ArrayXL = Eigen::Array<int64_t, Eigen::Dynamic, 1>;
//calc lempel ziv
struct LZ {
static size_t calc(const ArrayXL &seq) {
std::vector<ArrayXL> dictionary;
size_t wordStart=0;
size_t wordEnd=1;
while (wordEnd <= seq.size()) {
auto word = seq.segment(wordStart, wordEnd - wordStart);
// //is it in the dictionary
auto findResult = std::find_if(dictionary.begin(), dictionary.end(), [word] (const ArrayXL& s) {
bool eq = false;
if (s.size() == word.size()) {
eq=true;
for(size_t i=0; i < s.size(); i++) {
if (word[i] != s[i]) {
eq = false;
break;
}
}
}
return eq;
});
if (findResult == dictionary.end()) {
dictionary.push_back(word);
wordStart = wordEnd;
wordEnd = wordStart+1;
}
else{
wordEnd++;
}
}
return dictionary.size();
}
/*Normalised
see Zhang, Y., Hao, J., Zhou, C., & Chang, K. (2009). Normalized Lempel-Ziv complexity and its application in bio-sequence analysis. Journal of Mathematical Chemistry, 46(4), 1203–1212. https://doi.org/10.1007/s10910-008-9512-2
*/
static double calcNorm(const ArrayXL &seq) {
const auto n = seq.size();
return LZ::calc(seq) / (n / log(n));
}
};