-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathx86_decoder.c
305 lines (300 loc) · 10.8 KB
/
x86_decoder.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#include "x86_decoder.h"
#if defined(__x86_64__) || defined(__i386__)
unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix,
char **rex_ptr, char **mod_rm_ptr, char **sib_ptr,
bool *is_group) {
enum {
BYTE_OP = (1<<1), // 0x02
IMM = (1<<2), // 0x04
IMM_BYTE = (2<<2), // 0x08
MEM_ABS = (3<<2), // 0x0C
MODE_MASK = (7<<2), // 0x1C
MOD_RM = (1<<5), // 0x20
STACK = (1<<6), // 0x40
GROUP = (1<<7), // 0x80
GROUP_MASK = 0x7F,
};
static unsigned char opcode_types[512] = {
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x00 - 0x07
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x00, // 0x08 - 0x0F
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x10 - 0x17
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x18 - 0x1F
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x20 - 0x27
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x28 - 0x2F
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x30 - 0x37
0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x38 - 0x3F
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40 - 0x47
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48 - 0x4F
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x50 - 0x57
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x58 - 0x5F
0x01, 0x01, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0x60 - 0x67
0x45, 0x25, 0x49, 0x29, 0x03, 0x01, 0x03, 0x01, // 0x68 - 0x6F
0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x70 - 0x77
0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x78 - 0x7F
0x27, 0x25, 0x27, 0x29, 0x23, 0x21, 0x23, 0x21, // 0x80 - 0x87
0x23, 0x21, 0x23, 0x21, 0x21, 0x21, 0x21, 0x80, // 0x88 - 0x8F
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90 - 0x97
0x01, 0x01, 0x05, 0x01, 0x41, 0x41, 0x01, 0x01, // 0x98 - 0x9F
0x0F, 0x0D, 0x0F, 0x0D, 0x03, 0x01, 0x03, 0x01, // 0xA0 - 0xA7
0x09, 0x05, 0x03, 0x01, 0x03, 0x01, 0x03, 0x01, // 0xA8 - 0xAF
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // 0xB0 - 0xB7
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0xB8 - 0xBF
0x27, 0x29, 0x01, 0x01, 0x21, 0x21, 0x27, 0x25, // 0xC0 - 0xC7
0x01, 0x01, 0x01, 0x01, 0x01, 0x09, 0x01, 0x01, // 0xC8 - 0xCF
0x23, 0x21, 0x23, 0x21, 0x09, 0x09, 0x01, 0x01, // 0xD0 - 0xD7
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xD8 - 0xDF
0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0xE0 - 0xE7
0x05, 0x05, 0x05, 0x09, 0x03, 0x01, 0x03, 0x01, // 0xE8 - 0xEF
0x00, 0x01, 0x00, 0x00, 0x01, 0x01, 0x88, 0x90, // 0xF0 - 0xF7
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x98, 0xA0, // 0xF8 - 0xFF
0x00, 0xA8, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, // 0xF00 - 0xF07
0x01, 0x01, 0x00, 0x01, 0x00, 0x21, 0x01, 0x00, // 0xF08 - 0xF0F
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF10 - 0xF17
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF18 - 0xF1F
0x21, 0x21, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0xF20 - 0xF27
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF28 - 0xF2F
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, // 0xF30 - 0xF37
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF38 - 0xF3F
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF40 - 0xF47
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF48 - 0xF4F
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF50 - 0xF57
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF58 - 0xF5F
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF60 - 0xF67
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF68 - 0xF6F
0x21, 0x00, 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, // 0xF70 - 0xF77
0x21, 0x21, 0x00, 0x00, 0x21, 0x21, 0x21, 0x21, // 0xF78 - 0xF7F
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF80 - 0xF87
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF88 - 0xF8F
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF90 - 0xF97
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF98 - 0xF9F
0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x00, 0x00, // 0xFA0 - 0xFA7
0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x21, 0x21, // 0xFA8 - 0xFAF
0x23, 0x21, 0x00, 0x21, 0x00, 0x00, 0x23, 0x21, // 0xFB0 - 0xFB7
0x21, 0x00, 0x29, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFB8 - 0xFBF
0x21, 0x21, 0x00, 0x21, 0x00, 0x00, 0x00, 0x21, // 0xFC0 - 0xFC7
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xFC8 - 0xFCF
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD0 - 0xFD7
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD8 - 0xFDF
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE0 - 0xFE7
0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE8 - 0xFEF
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF0 - 0xFF7
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF8 - 0xFFF
};
static unsigned char group_table[56] = {
0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 1A
0x27, 0x27, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, // Group 3 (Byte)
0x25, 0x25, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // Group 3
0x23, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 4
0x21, 0x21, 0x61, 0x21, 0x61, 0x21, 0x61, 0x00, // Group 5
0x00, 0x00, 0x21, 0x21, 0x21, 0x00, 0x21, 0x23, // Group 7
0x21, 0x00, 0x00, 0x21, 0x21, 0x00, 0x21, 0x00, // Group 7 (Alternate)
};
const unsigned char *insn_ptr = (const unsigned char *)*ip;
int operand_width = 4;
int address_width = 4;
if (is64bit) {
address_width = 8;
}
unsigned char byte, rex = 0;
bool found_prefix = false;
if (rex_ptr) {
*rex_ptr = 0;
}
if (mod_rm_ptr) {
*mod_rm_ptr = 0;
}
if (sib_ptr) {
*sib_ptr = 0;
}
for (;; ++insn_ptr) {
switch (byte = *insn_ptr) {
case 0x66: // Operand width prefix
operand_width ^= 6;
break;
case 0x67: // Address width prefix
address_width ^= is64bit ? 12 : 6;
break;
case 0x26: // Segment selector prefixes
case 0x2e:
case 0x36:
case 0x3e:
case 0x64:
case 0x65:
case 0xF0:
case 0xF2:
case 0xF3:
break;
case 0x40: case 0x41: case 0x42: case 0x43: // 64 bit REX prefixes
case 0x44: case 0x45: case 0x46: case 0x47:
case 0x48: case 0x49: case 0x4A: case 0x4B:
case 0x4C: case 0x4D: case 0x4E: case 0x4F:
if (is64bit) {
if (rex_ptr) {
*rex_ptr = (char *)insn_ptr;
}
rex = byte;
found_prefix = true;
continue;
}
// fall through
default:
++insn_ptr;
goto no_more_prefixes;
}
rex = 0;
found_prefix = true;
}
no_more_prefixes:
if (has_prefix) {
*has_prefix = found_prefix;
}
if (rex & REX_W) {
operand_width = 8;
}
unsigned char type;
unsigned short insn = byte;
unsigned int idx = 0;
if (byte == 0x0F) {
byte = *insn_ptr++;
insn = (insn << 8) | byte;
idx = 256;
}
type = opcode_types[idx + byte];
bool found_mod_rm = false;
bool found_group = false;
bool found_sib = false;
unsigned char mod_rm = 0;
unsigned char sib = 0;
if (type & GROUP) {
found_mod_rm = true;
found_group = true;
mod_rm = *insn_ptr;
if (mod_rm_ptr) {
*mod_rm_ptr = (char *)insn_ptr;
}
unsigned char group = (type & GROUP_MASK) + ((mod_rm >> 3) & 0x7);
if ((type & GROUP_MASK) == 40 && (mod_rm >> 6) == 3) {
group += 8;
}
type = group_table[group];
}
if (!type) {
// We know that we still don't decode some of the more obscure
// instructions, but for all practical purposes that doesn't matter.
// Compilers are unlikely to output them, and even if we encounter
// hand-coded assembly, we will soon synchronize to the instruction
// stream again.
//
// std::cerr << "Unsupported instruction at 0x" << std::hex <<
// std::uppercase << reinterpret_cast<long>(*ip) << " [ ";
// for (const unsigned char *ptr =
// reinterpret_cast<const unsigned char *>(*ip);
// ptr < insn_ptr; ) {
// std::cerr << std::hex << std::uppercase << std::setw(2) <<
// std::setfill('0') << (unsigned int)*ptr++ << ' ';
// }
// std::cerr << "]" << std::endl;
} else {
if (is64bit && (type & STACK)) {
operand_width = 8;
}
if (type & MOD_RM) {
found_mod_rm = true;
if (mod_rm_ptr) {
*mod_rm_ptr = (char *)insn_ptr;
}
mod_rm = *insn_ptr++;
int mod = (mod_rm >> 6) & 0x3;
int rm = 8*(rex & REX_B) + (mod_rm & 0x7);
if (mod != 3) {
if (address_width == 2) {
switch (mod) {
case 0:
if (rm != 6 /* SI */) {
break;
}
// fall through
case 2:
insn_ptr++;
// fall through
case 1:
insn_ptr++;
break;
}
} else {
if ((rm & 0x7) == 4) {
found_sib = true;
if (sib_ptr) {
*sib_ptr = (char *)insn_ptr;
}
sib = *insn_ptr++;
if (!mod && (sib & 0x7) == 5 /* BP */) {
insn_ptr += 4;
}
}
switch (mod) {
case 0:
if (rm != 5 /* BP */) {
break;
}
// fall through
case 2:
insn_ptr += 3;
// fall through
case 1:
insn_ptr++;
break;
}
}
}
}
switch (insn) {
case 0xC8: // ENTER
insn_ptr++;
// fall through
case 0x9A: // CALL (far)
case 0xC2: // RET (near)
case 0xCA: // LRET
case 0xEA: // JMP (far)
insn_ptr += 2;
break;
case 0xF80: case 0xF81: case 0xF82: case 0xF83: // Jcc (rel)
case 0xF84: case 0xF85: case 0xF86: case 0xF87:
case 0xF88: case 0xF89: case 0xF8A: case 0xF8B:
case 0xF8C: case 0xF8D: case 0xF8E: case 0xF8F:
insn_ptr += operand_width;
break;
}
switch (type & MODE_MASK) {
case IMM:
if (!(type & BYTE_OP)) {
switch (insn) {
case 0xB8: case 0xB9: case 0xBA: case 0xBB:
case 0xBC: case 0xBD: case 0xBE: case 0xBF:
// Allow MOV to/from 64bit addresses
insn_ptr += operand_width;
break;
default:
insn_ptr += (operand_width == 8) ? 4 : operand_width;
break;
}
break;
}
// fall through
case IMM_BYTE:
insn_ptr++;
break;
case MEM_ABS:
insn_ptr += address_width;
break;
}
}
if (is_group) {
*is_group = found_group;
}
*ip = (const char *)insn_ptr;
(void)found_mod_rm;
(void)found_sib;
return insn;
}
#endif