This repository has been archived by the owner on Jun 13, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
global.h
304 lines (258 loc) · 9.67 KB
/
global.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
// global.h
#define KANJI
//#define KANJI_MODE (_prog->reganch & ROPT_KANJI)
#define KANJI_MODE kanji_mode_flag
typedef union any ANY;
union any {
void* any_ptr;
int any_i32;
long any_iv;
long any_long;
};
/**************************************************************************/
/* This regexp stuff is global since it always happens within 1 expr eval */
/**************************************************************************/
/* Current curly descriptor */
typedef struct curcur CURCUR;
struct curcur {
int parenfloor; /* how far back to strip paren data */
int cur; /* how many instances of scan we've matched */
int min; /* the minimal number of scans to match */
int max; /* the maximal number of scans to match */
int minmod; /* whether to work our way up or down */
char * scan; /* the thing to match */
char * next; /* what has to match after it */
char * lastloc; /* where we started matching this scan */
CURCUR * oldcc; /* current curly before we started this one */
};
typedef struct global GLOBAL;
struct global {
ANY* savestack; /* to save non-local values on */
int savestack_ix;
int savestack_max;
char * regprecomp; /* uncompiled string. */
char * regparse; /* Input-scan pointer. */
char * regxend; /* End of input for compile */
int regnpar; /* () count. */
char * regcode; /* Code-emit pointer; ®dummy = don't. */
int regsize; /* Code size. */
int regnaughty; /* How bad is this pattern? */
int regsawback; /* Did we see \1, ...? */
char * reginput; /* String-input pointer. */
char * regbol; /* Beginning of input, for ^ check. */
char * regeol; /* End of input, for $ check. */
char ** regstartp; /* Pointer to startp array. */
char ** regendp; /* Ditto for endp. */
int * reglastparen; /* Similarly for lastparen. */
char * regtill; /* How far we are required to go. */
int regflags; /* are we folding, multilining? */
char regprev; /* char before regbol, \n if none */
// regcomp part
unsigned char char_bitmap[32];
// current regexp pointer
char* regexp_current;
// regexec.cpp and sv.cpp part
int multiline;
// regexec.cpp
int kanji_mode_flag;
CURCUR* regcc;
};
#define savestack globalp->savestack
#define savestack_ix globalp->savestack_ix
#define savestack_max globalp->savestack_max
#define regprecomp globalp->regprecomp
#define regparse globalp->regparse
#define regxend globalp->regxend
#define regnpar globalp->regnpar
#define regcode globalp->regcode
#define regsize globalp->regsize
#define regnaughty globalp->regnaughty
#define regsawback globalp->regsawback
#define reginput globalp->reginput
#define regbol globalp->regbol
#define regeol globalp->regeol
#define regstartp globalp->regstartp
#define regendp globalp->regendp
#define reglastparen globalp->reglastparen
#define regtill globalp->regtill
#define regflags globalp->regflags
#define regprev globalp->regprev
#define char_bitmap globalp->char_bitmap
#define multiline globalp->multiline
#define regexp_current globalp->regexp_current
#define regcc globalp->regcc
#define kanji_mode_flag globalp->kanji_mode_flag
/**************************************************************************/
/* end og global */
/**************************************************************************/
#define isALNUM(c) (isascii(c) && (isalpha(c) || isdigit(c) || c == '_'))
#define isIDFIRST(c) (isascii(c) && (isalpha(c) || (c) == '_'))
#define isALPHA(c) (isascii(c) && isalpha(c))
#define isSPACE(c) (isascii(c) && isspace(c))
#define isDIGIT(c) (isascii(c) && isdigit(c))
#define isUPPER(c) (isascii(c) && isupper(c))
#define isLOWER(c) (isascii(c) && islower(c))
#define toUPPER(c) toupper(c)
#define toLOWER(c) tolower(c)
/* definition number opnd? meaning */
#define ANYOF_EXACT 128 /* sv */
#define ANYOF_FROMTO 129 /* sv */
#define ANYOF_ENDMARK 130 /* sv */
#define ANYOF_COMPL 131 /* sv */
#define ROPT_KANJI 8
#define ROPT_ANCH 1
#define ROPT_SKIP 2
#define ROPT_IMPLICIT 4
/* definition number opnd? meaning */
#define END 0 /* no End of program. */
#define BOL 1 /* no Match "" at beginning of line. */
#define MBOL 2 /* no Same, assuming multiline. */
#define SBOL 3 /* no Same, assuming singleline. */
#define EOL 4 /* no Match "" at end of line. */
#define MEOL 5 /* no Same, assuming multiline. */
#define SEOL 6 /* no Same, assuming singleline. */
#define ANY 7 /* no Match any one character (except newline). */
#define SANY 8 /* no Match any one character. */
#define ANYOF 9 /* sv Match character in (or not in) this class. */
#define CURLY 10 /* sv Match this simple thing {n,m} times. */
#define CURLYX 11 /* sv Match this complex thing {n,m} times. */
#define BRANCH 12 /* node Match this alternative, or the next... */
#define BACK 13 /* no Match "", "next" ptr points backward. */
#define EXACTLY 14 /* sv Match this string (preceded by length). */
#define NOTHING 15 /* no Match empty string. */
#define STAR 16 /* node Match this (simple) thing 0 or more times. */
#define PLUS 17 /* node Match this (simple) thing 1 or more times. */
#define ALNUM 18 /* no Match any alphanumeric character */
#define NALNUM 19 /* no Match any non-alphanumeric character */
#define BOUND 20 /* no Match "" at any word boundary */
#define NBOUND 21 /* no Match "" at any word non-boundary */
#define SPACE 22 /* no Match any whitespace character */
#define NSPACE 23 /* no Match any non-whitespace character */
#define DIGIT 24 /* no Match any numeric character */
#define NDIGIT 25 /* no Match any non-numeric character */
#define REF 26 /* num Match some already matched string */
#define OPEN 27 /* num Mark this point in input as start of #n. */
#define CLOSE 28 /* num Analogous to OPEN. */
#define MINMOD 29 /* no Next operator is not greedy. */
#define GBOL 30 /* no Matches where last m//g left off. */
#define IFMATCH 31 /* no Succeeds if the following matches. */
#define UNLESSM 32 /* no Fails if the following matches. */
#define SUCCEED 33 /* no Return from a subroutine, basically. */
#define WHILEM 34 /* no Do curly processing and see if rest matches. */
/*
* Opcode notes:
*
* BRANCH The set of branches constituting a single choice are hooked
* together with their "next" pointers, since precedence prevents
* anything being concatenated to any individual branch. The
* "next" pointer of the last BRANCH in a choice points to the
* thing following the whole choice. This is also where the
* final "next" pointer of each individual branch points; each
* branch starts with the operand node of a BRANCH node.
*
* BACK Normal "next" pointers all implicitly point forward; BACK
* exists to make loop structures possible.
*
* STAR,PLUS '?', and complex '*' and '+', are implemented as circular
* BRANCH structures using BACK. Simple cases (one character
* per match) are implemented with STAR and PLUS for speed
* and to minimize recursive plunges.
*
* OPEN,CLOSE ...are numbered at compile time.
*/
static char regkind[] = {
END,
BOL,
BOL,
BOL,
EOL,
EOL,
EOL,
ANY,
ANY,
ANYOF,
CURLY,
CURLY,
BRANCH,
BACK,
EXACTLY,
NOTHING,
STAR,
PLUS,
ALNUM,
NALNUM,
BOUND,
NBOUND,
SPACE,
NSPACE,
DIGIT,
NDIGIT,
REF,
OPEN,
CLOSE,
MINMOD,
BOL,
BRANCH,
BRANCH,
END,
WHILEM
};
/*
* A node is one char of opcode followed by two chars of "next" pointer.
* "Next" pointers are stored as two 8-bit pieces, high order first. The
* value is a positive offset from the opcode of the node containing it.
* An operand, if any, simply follows the node. (Note that much of the
* code generation knows about this implicit relationship.)
*
* Using two bytes for the "next" pointer is vast overkill for most things,
* but allows patterns to get big without disasters.
*
* [If REGALIGN is defined, the "next" pointer is always aligned on an even
* boundary, and reads the offset directly as a short. Also, there is no
* special test to reverse the sign of BACK pointers since the offset is
* stored negative.]
*/
#define REGALIGN
#define OP(p) (*(p))
#define NEXT(p) (*(short*)(p+1))
#define ARG1(p) (*(unsigned short*)(p+3))
#define ARG2(p) (*(unsigned short*)(p+5))
#define OPERAND(p) ((p) + 3)
#define NEXTOPER(p) ((p) + 4)
#define PREVOPER(p) ((p) - 4)
#define MAGIC 0234
/*
* Utility definitions.
*/
#ifndef CHARMASK
#define UCHARAT(p) ((int)*(unsigned char *)(p))
#else
#define UCHARAT(p) ((int)*(p)&CHARMASK)
#endif
#define FAIL(c) throw(c)
// 2006/06/24 かろと : MFCの無い環境でエラーが出ないように
#define TRACE(x)
#define TRACE0(x)
#define TRACE1(x, y)
#define TRACE2(x, y, z)
#define PMf_USED 0x0001 /* pm has been used once already */
#define PMf_ONCE 0x0002 /* use pattern only once per reset */
#define PMf_SCANFIRST 0x0004 /* initial constant not anchored */
#define PMf_ALL 0x0008 /* initial constant is whole pat */
#define PMf_SKIPWHITE 0x0010 /* skip leading whitespace for split */
#define PMf_FOLD 0x0020 /* case insensitivity */
#define PMf_CONST 0x0040 /* subst replacement is constant */
#define PMf_KEEP 0x0080 /* keep 1st runtime pattern forever */
#define PMf_GLOBAL 0x0100 /* pattern had a g modifier */
#define PMf_RUNTIME 0x0200 /* pattern coming in on the stack */
#define PMf_EVAL 0x0400 /* evaluating replacement as expr */
#define PMf_WHITE 0x0800 /* pattern is \s+ */
#define PMf_MULTILINE 0x1000 /* assume multiple lines */
#define PMf_SINGLELINE 0x2000 /* assume single line */
#define PMf_KANJI 0x4000 /* KANJI mode */
#define PMf_EXTENDED 0x8000 /* chuck embedded whitespace */
#define PMf_SUBSTITUTE 0x010000 /* substitute */
#define PMf_TRANSLATE 0x020000 /* translate */
#define PMf_TRANS_COMPLEMENT 0x040000 /* translate complement */
#define PMf_TRANS_DELETE 0x080000 /* translate delete */
#define PMf_TRANS_SQUASH 0x100000 /* translate squash */