forked from apertium/apertium-lex-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlrx_processor.h
143 lines (119 loc) · 3.61 KB
/
lrx_processor.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*
* Copyright (C) 2011--2012 Universitat d'Alacant
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __LRX_PROCESSOR_H__
#define __LRX_PROCESSOR_H__
#include <cwchar>
#include <cstdio>
#include <libgen.h>
#include <cerrno>
#include <string>
#include <iostream>
#include <cmath>
#include <sstream>
#include <limits>
#include <cstdlib>
#include <list>
#include <algorithm>
#include <set>
#include <libxml/xmlreader.h>
#include <lttoolbox/ltstr.h>
#include <lttoolbox/lt_locale.h>
#include <lttoolbox/transducer.h>
#include <lttoolbox/xml_parse_util.h>
#include <lttoolbox/alphabet.h>
#include <lttoolbox/exception.h>
#include <lttoolbox/compression.h>
#include <lttoolbox/regexp_compiler.h>
#include <lttoolbox/state.h>
#include <lttoolbox/match_exe.h>
#include <lttoolbox/trans_exe.h>
#include <lttoolbox/my_stdio.h>
using namespace std;
/*
class BiltransToken {
public:
bool isEOF = false;
wstring source;
wstring blanks;
vector<wstring> target;
wstring toString(bool delim) {
wstring out = source;
for(int i = 0; i < target.size(); i++) {
out += L'/' + target[i];
}
if (delim && (source.size() > 0 || target.size() > 0)) {
out = blanks + L'^' + out + L'$';
} else {
out = blanks + out;
}
return out;
}
};
*/
class LRXProcessor
{
private:
Alphabet alphabet;
TransExe transducer;
map<wstring, TransExe> recognisers;
map<wstring, double> weights;
// map<int, BiltransToken> bts;
vector<State> alive_states;
map<Node *, double> anfinals;
set<wchar_t> escaped_chars;
State *initial_state;
bool traceMode;
bool debugMode;
bool nullFlush;
bool outOfWord;
unsigned int pos;
unsigned long lineno;
wstring itow(int i);
bool recognisePattern(const wstring lu, const wstring op);
wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2);
// BiltransToken readBiltransToken(FILE *input = stdin);
void makeTransition(int);
void filterFinals();
void evaluateRules();
void processFlush(FILE *output,
map<int, wstring > &sl,
map<int, vector<wstring> > &tl,
map<int, wstring > &blanks,
map<int, pair<double, vector<State> > > &covers,
pair<double, vector<State> > &empty_seq,
map<pair<int, int>, vector<State> > &spans,
int last_final);
void processFlushME(FILE *output,
map<int, wstring > &sl,
map<int, vector<wstring> > &tl,
map<int, wstring > &blanks,
map<int, map<wstring, double> > &scores);
public:
static wstring const LRX_PROCESSOR_TAG_SELECT;
static wstring const LRX_PROCESSOR_TAG_REMOVE;
static wstring const LRX_PROCESSOR_TAG_SKIP;
LRXProcessor();
~LRXProcessor();
void setTraceMode(bool mode);
void setDebugMode(bool mode);
void setNullFlush(bool mode);
void init();
void load(FILE *input);
void process(FILE *input, FILE *output);
void processME(FILE *input, FILE *output);
};
#endif /* __LRX_PROCESSOR_H__ */