forked from TheAlgorithms/Rust
-
Notifications
You must be signed in to change notification settings - Fork 0
/
suffix_array.rs
95 lines (88 loc) · 2.56 KB
/
suffix_array.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// In computer science, a suffix array is a sorted array of all suffixes of a string.
// It is a data structure used in, among others, full-text indices, data-compression algorithms,
// and the field of bibliometrics. Source: https://en.wikipedia.org/wiki/Suffix_array
use std::cmp::Ordering;
#[derive(Clone)]
struct Suffix {
index: usize,
rank: (i32, i32),
}
impl Suffix {
fn cmp(&self, b: &Self) -> Ordering {
let a = self;
let ((a1, a2), (b1, b2)) = (a.rank, b.rank);
match a1.cmp(&b1) {
Ordering::Equal => {
if a2 < b2 {
Ordering::Less
} else {
Ordering::Greater
}
}
o => o,
}
}
}
pub fn generate_suffix_array(txt: &str) -> Vec<usize> {
let n = txt.len();
let mut suffixes: Vec<Suffix> = vec![
Suffix {
index: 0,
rank: (-1, -1)
};
n
];
for (i, suf) in suffixes.iter_mut().enumerate() {
suf.index = i;
suf.rank.0 = (txt.chars().nth(i).expect("this should exist") as u32 - 'a' as u32) as i32;
suf.rank.1 = if (i + 1) < n {
(txt.chars().nth(i + 1).expect("this should exist") as u32 - 'a' as u32) as i32
} else {
-1
}
}
suffixes.sort_by(|a, b| a.cmp(b));
let mut ind = vec![0; n];
let mut k = 4;
while k < 2 * n {
let mut rank = 0;
let mut prev_rank = suffixes[0].rank.0;
suffixes[0].rank.0 = rank;
ind[suffixes[0].index] = 0;
for i in 1..n {
if suffixes[i].rank.0 == prev_rank && suffixes[i].rank.1 == suffixes[i - 1].rank.1 {
prev_rank = suffixes[i].rank.0;
suffixes[i].rank.0 = rank;
} else {
prev_rank = suffixes[i].rank.0;
rank += 1;
suffixes[i].rank.0 = rank;
}
ind[suffixes[i].index] = i;
}
for i in 0..n {
let next_index = suffixes[i].index + (k / 2);
suffixes[i].rank.1 = if next_index < n {
suffixes[ind[next_index]].rank.0
} else {
-1
}
}
suffixes.sort_by(|a, b| a.cmp(b));
k *= 2;
}
let mut suffix_arr = Vec::new();
for suf in suffixes {
suffix_arr.push(suf.index);
}
suffix_arr
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_suffix_array() {
let a = generate_suffix_array("banana");
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]);
}
}