-
Notifications
You must be signed in to change notification settings - Fork 28
/
Repeated_DNA_Sequences.cpp
78 lines (72 loc) · 2.01 KB
/
Repeated_DNA_Sequences.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
vector<string> result;
if(s.length() < 10) {
return result;
}
unordered_map <int, int> Map;
string substr = s.substr(0, 10);
int hash = stringHash(substr);
Map[hash]++;
for(int i = 10; i < s.length(); ++i) {
int newHash = updateHash(hash, s[i]);
Map[newHash]++;
hash = newHash;
}
for(auto it = Map.begin(); it != Map.end(); ++it) {
if(it->second > 1) {
string str = hashToStr(it->first);
result.push_back(str);
}
}
return result;
}
int stringHash(string &s) {
int out = 0;
for(int i = 0; i < 10; ++i) {
char c = s[i];
if(c == 'C') {
out |= 1;
} else if(c == 'G') {
out |= 2;
} else if(c == 'T') {
out |= 3;
}
out <<= 2;
}
out >>= 2;
return out;
}
int updateHash(int oldHash, char newChar) {
int mask = ~(3 << 18);
oldHash &= mask;
oldHash <<= 2;
if(newChar == 'C') {
oldHash |= 1;
} else if(newChar == 'G') {
oldHash |= 2;
} else if(newChar == 'T') {
oldHash |= 3;
}
return oldHash;
}
string hashToStr(int hash) {
int mask = 3;
string out = "";
for(int i = 0; i < 10; ++i) {
int code = hash & mask;
if(code == 0) {
out = 'A' + out;
} else if(code == 1) {
out = 'C' + out;
} else if(code == 2) {
out = 'G' + out;
} else if(code == 3) {
out = 'T' + out;
}
hash >>= 2;
}
return out;
}
};