-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathdesa_main.cpp
117 lines (98 loc) · 3.96 KB
/
desa_main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <utility>
// using TCLAP for command line parsing
#include <tclap/CmdLine.h>
// mxx dependencies
#include <mxx/env.hpp>
#include <mxx/comm.hpp>
#include <mxx/file.hpp>
#include <mxx/utils.hpp>
#include <suffix_array.hpp>
#include <lcp.hpp>
#include <divsufsort_wrapper.hpp>
#include "seq_query.hpp"
#include "desa.hpp"
#include "tldt.hpp"
// define index and TLI types for experiment
using index_t = uint64_t;
using TLI_t = tldt<index_t>;
//using TLI_t = tllt<index_t>;
using desa_t = dist_desa<index_t, TLI_t>;
int main(int argc, char *argv[]) {
// set up mxx / MPI
mxx::env e(argc, argv);
mxx::env::set_exception_on_error();
mxx::comm c;
mxx::print_node_distribution(c);
// given a file, compute suffix array and lcp array, how to do count/locate query?
// via sa_range()
/*
if (argc < 3) {
std::cerr << "Usage: ./xx <text_file> <pattern_file>" << std::endl;
}
*/
try {
// define commandline usage
TCLAP::CmdLine cmd("Distributed Enhanced Suffix Array");
TCLAP::ValueArg<std::string> fileArg("f", "file", "Input string filename.", true, "", "filename");
cmd.add(fileArg);
TCLAP::ValueArg<std::string> loadArg("l", "load", "Load index from given basename", false, "", "filename");
TCLAP::SwitchArg constructArg("c", "construct", "Construct SA/LCP/Lc from input file", false);
cmd.xorAdd(loadArg, constructArg); // either load or construct SA/LCP
TCLAP::ValueArg<std::string> outArg("o", "outfile", "Output file base name. If --construct was used, this stores the resulting DESA.", false, "", "filename");
cmd.add(outArg);
TCLAP::ValueArg<std::string> queryArg("q", "query", "Query file for benchmarking querying.", false, "", "filename");
cmd.add(queryArg);
cmd.parse(argc, argv);
mxx::section_timer t;
// create distributed DESA class
using range_t = desa_t::range_t;
desa_t idx(c);
if (constructArg.getValue()) {
if (c.rank() == 0) {
std::cout << "constructing DESA (SA+LCP+LC)..." << std::endl;
}
// read input file into in-memory string
std::string input_str = mxx::file_block_decompose(fileArg.getValue().c_str(), c);
t.end_section("read input file");
// construct DESA from scratch
idx.construct(input_str.begin(), input_str.end(), c);
t.end_section("construct idx");
if (outArg.getValue() != "") {
// store DESA to given basename
if (c.rank() == 0) {
std::cout << "saving DESA to basename `" << outArg.getValue() << "` ..." << std::endl;
}
idx.write(outArg.getValue(), c);
}
} else {
if (outArg.getValue() != "") {
if (c.rank() == 0) {
std::cerr << "WARNING: --outfile argument will be ignored since the input is loaded from file (don't use in conjuction with --load)." << std::endl;
}
}
if (c.rank() == 0) {
std::cout << "loading DESA (SA+LCP+LC) from basename `" << loadArg.getValue() << "` ..." << std::endl;
}
idx.read(fileArg.getValue(), loadArg.getValue(), c);
}
// query benchmarking?
if (queryArg.getValue() != "") {
strings ss = strings::from_dfile(queryArg.getValue(), c);
t.end_section("read patterns file");
// run locate a couple of times
int reps = 10;
for (int i = 0; i < reps; ++i) {
std::vector<range_t> mysols = idx.bulk_locate(ss);
t.end_section("bulk_locate");
}
}
} catch (TCLAP::ArgException& e) {
std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl;
exit(EXIT_FAILURE);
}
return 0;
}