-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_functions_utils.py
321 lines (267 loc) · 13.3 KB
/
parse_functions_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import re, os.path, cxxfilt, glob, codecs, config_parser
from timeit import timeit
# https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/design.html
# http://docs.oracle.com/javase/specs/jls/se7/html/jls-4.html#jls-4.2
Java_types64 = ['long', 'double'] # 64 bits
Java_types32 = ['int', 'byte', 'short', 'char', 'float', 'boolean'] #32 bits
void_type = ['void']
C_types32 = ['int32_t','int16_t', 'int8_t', 'int', 'unsigned',
'float', 'char', 'bool', 'long int', 'wchar_t',
'jbyte', 'jchar', 'jboolean', 'jshort', 'jint', 'jfloat', 'size_t'] #unsigned не учитываем
jni_objects_types = ['jclass', 'jstring', 'jcharArray'] #todo add other Arrays
C_types64 = ['long long', 'double', 'int64_t', 'jlong', 'jdouble']
C_types128 = ['long double']
JNI_types = ['']
def init_config(config):
global java_dir
global jni_dir
global system_paths
configParser = config
java_dir = configParser.get('java_directory') # todo set directory for java
jni_dir = configParser.get('native_directory')
# jnih_path = '/home/daria/Android/Sdk/ndk-bundle/sysroot/usr/include/jni.h'
system_paths = configParser.getSection('system_directories')
#for native functions declared in Java code
def getJavafunctionType(JNI_function_name):
function_name = str(JNI_function_name)
if not function_name.startswith('Java'):
return -1 #Not right JNI function name
#ищем escaped символы
#if re.search('_[0-3]', function_name) is not None:
#todo заменить на исходные символы
#todo replace _2, _3, ...
params = ''
if '__' in function_name:
params = parseJavaFuncParams(function_name.split('__')[-1])
params = [p[:-2]+'(.|\n)*\[\](.|\n)*' for p in params if '[]' in p]
params_regex = '(.|\n)*,\s*'.join(params)
function_name = function_name.split('__')[0].replace('_', ' ').replace(' 1','_')
splitted_path = function_name.split(' ')
#a = re.split('_[^0-9]', function_name)
short_func_name = splitted_path[-1]
if params!='':
short_func_name+='(.|\n)*\((.|\n)*'+params_regex+'(.|\n)*\)'
path = java_dir+'/'.join(splitted_path[1:-1])+'.java' #remove Java and func name
if not os.path.isfile(path):
print('NO FILE {0}!'.format(path))
return None #todo
with codecs.open(path, "r", encoding='utf-8', errors='ignore') as f:
data = f.read()
func_declaration = re.search('native .* {0}'.format(short_func_name), data)
if func_declaration is None:
print('NONE DECLARATION in {0} for {1}'.format(path, short_func_name))
return None #todo
# все типы односложные, нет указателей
type = func_declaration.group().split('(')[0] #убираем параметры
type = type.split(' ')[-2]
if type == '[]': #если в коде 'type []' (есть пробел)
type = type.split(' ')[-3] + type.split(' ')[-2]
return type
def getCFunctionType(func_name):
if func_name == '': # нет функции -> не можем определить тип ->None
return None
#jni типы не берем, потому что такие функции начинаются с _Java
pattern = C_types128 + C_types64 + C_types32 + void_type +['\*', '\[\]'] + jni_objects_types
type = re.search('({0})\s*\*?\s*'.format('|'.join(pattern)), func_name)
# учесть указатели!
if type is None: #функция есть, но тип не 128 и не 64 и не void -> 32
return ''
return type.group().strip()
def getTypeSize(type, isJNI):
if type == None: # не нашли функцию -> может быть любой тип
return 4
if not isJNI and type in C_types128:
return 4
if isJNI and type in Java_types64 or not isJNI and type in C_types64:
return 2
if type !='':
aaa = 1
if '*' in type or '[]' in type \
or isJNI and type in Java_types32 \
or not isJNI and type in C_types32\
or type in jni_objects_types:
return 1
# важно, что void после *, так как void* = 32 бита
if type == 'void':
return 0
return 4
def getFunctionsReturnTypeSize(functions, config):
init_config(config)
#function_types = dict()
function_types = dict.fromkeys(functions.keys(), '') #адрес - найденнная функция
functions = dict(functions) # адрес - функция
backup = functions.copy()
# обрабатываем JNI функции
Java_functions = dict((address, func) for address, func in functions.items()
if func.startswith('Java'))
for address, function in Java_functions.items():
if 'int32' in function or 'int64' in function:
br = 1
function_types[address] = getJavafunctionType(function)
# отпределяем размер для JNI
return_sizes = dict()
for address, func in function_types.items():
if func!='':
return_sizes[address] = getTypeSize(func, True)
# обрабатываем C фукнции
C_functions = dict((address, func) for address, func in functions.items()
if not func.startswith('Java') and not func == '')
# demangle mangled functions
for address, function in C_functions.items():
C_functions[address] = demangleNativeFunctionName(function)
# отдельно выносим функции из jni.h
jni_functions = dict((address, func[7:]) for address, func in C_functions.items() if func.startswith('_JNIEnv'))
#C_functions = dict((address, func) if address not in jni_functions)
# ищем определение функций в файлах
c_found_funcs = findFunctionsInFiles(C_functions) #находим С-функции в h/c(pp) файлах
for address, function in c_found_funcs.items():
if address == '362c':
aaa = 1
# убираем параметры
function_types[address] = getCFunctionType(function.split('(')[0])
#обрабатывым функции из jni.h
for address, func in function_types.items():
if address not in return_sizes:
if func==-1:
aaa=1
return_sizes[address] = getTypeSize(func, False)
# print('4 bytes: ', len([f for f in return_sizes if return_sizes[f]==4]))
# print('2 bytes: ', len([f for f in return_sizes if return_sizes[f]==2]))
# print('1 bytes: ', len([f for f in return_sizes if return_sizes[f]==1]))
# print('0 bytes: ', len([f for f in return_sizes if return_sizes[f]==0]))
notfound = dict((f, backup[f]) for f in return_sizes if return_sizes[f]==4)
return return_sizes
def demangleNativeFunctionName(function):
# demangle mangled functions
if function.startswith('_Z'):
function = cxxfilt.demangle(function)
if function.startswith('_JNIEnv'):
function = function[9:]
return function
def searchInFile(patterns_dict,func_dict, file):
results = dict((key, '') for key in patterns_dict.keys()) #address - pattern
with codecs.open(file, "r", encoding='utf-8', errors='ignore') as f:
data = f.read()
for address, func in func_dict.items():
if func.split('(')[0] in data:
result = re.search(patterns_dict[address], data)
results[address] = result.group() if result is not None else ''
results = dict((key, value) for key, value in results.items() if value!='')
return results
types_equals = {'uint32': 'unsigned int',
'uint64': 'unsigned long long',
'uint32_t': 'unsigned int',
'uint64_t': 'unsigned long long',
'int32': 'int',
'int64': 'long long',
'int32_t': 'int',
'int64_t': 'long long'
}
types_patterns = {'int':'(int32(_t)?)|(j?int)|(j?size)',
'unsigned int':'uint32(_t)?',
'long long': '(int64(_t)?)|(j?long( long)?)',
'unsigned long long': 'uint64(_t)?',
'unsigned char': 'jboolean|(uint8(_t)?)|bool',
'signed char': '(j?byte)|int8(_t)?',
'short': '(j?short)|int16(_t)?',
'float': 'j?float',
'unsigned short': 'jchar',
'double': 'jdouble',
}
#pattern for C/C++ functions
#for jni functions in native code no need to think about params (they are in the name already)
def makePattern(func):
params = re.search('\((.|\n)*\)', func)
params_regex = '[^;]*'
if params is not None: # есть параметры
params_list = params.group()[1:-1].split(',')
# tmp = params_list.copy()
for i in range(len(params_list)):
p = params_list[i].strip()
p = p.replace(' const', '') # const меняет место, уберем его
if p.startswith('_j') and p[-1] == '*': # _jobject*->jobject
p = p[1:-1]
if p.startswith('_J'):
p = p[1:]
non_escaped = p.strip('&').strip('*').strip(' ') # запоминаем nonescaped параметры
p = re.escape(p) # escape
# todo unsigned int -> unsigned// int
if non_escaped in types_patterns:
p = p.replace(p, types_patterns[non_escaped])
# p = '\s*j?'+p #для jni
# todo make this regexp more effetive!!!
p = '(\\s*(const)?\\s*' + p + '\\s*(const)?\\s*)'
params_list[i] = p.replace('\\*', '\\s*\\*\\s*').replace('\\&', '\\s*\\&\\s*')
# params_regex = '(.|\n)*,(const)?\\s*(const)?'.join(params_list) #todo escape?
params_regex = '(.|\n)*\\s*'.join(params_list) # todo escape?
# для jni и void* -> void *
params_regex = params_regex \
.replace('_J', 'J').replace('_j', 'j')
result_pattern = re.compile('\n\s*([a-zA-Z0-9_\"\*\[\]]+\s+){0,3}'
+ '\*?{0}\s*\({1}.*\)(\s[a-zA_Z_]+)?\s'
.format(re.escape(func.split('(')[0]), params_regex)
+ '*(;|{)', re.MULTILINE)
return result_pattern
#открываем файл, ищем все функции
def findFunctionsInFiles(functions):
# functions = address:functions
f_backup = functions.copy()
result = dict((key,'') for key, value in functions.items()) # адрес - найденная функция
patterns = dict()
#выделяем типы входных параметров
for address, func in functions.items():
result_pattern = makePattern(func)
patterns[address] = result_pattern
#patterns = dict((address, '\n.*{0}(.*);'.format(re.escape(func)))
#for address, func in functions.items())
def find(path):
p = dict((address, pattern) for address,pattern in patterns.items())
for file in glob.iglob(path, recursive=True):
# print(file)
found_func = searchInFile(p, functions, file)
result.update(found_func)
#p = dict((key, value) for key, value in patterns.items() if result[key] == '')
for f in found_func.keys():
functions.pop(f)
p.pop(f)
return p
patterns = find(jni_dir+'/**/*.h')
# patterns = dict((address, re.compile(p.pattern[:-1]+re.escape('{'), re.MULTILINE))
# for address,p in patterns.items())
jni_res = dict((addr, f) for addr, f in result.items() if f!='')
#result.clear()
#
patterns = find(jni_dir+'/**/*.c*')
for p in system_paths:
patterns = find(p+'/**/*.h')
patterns = find(p+'/**/*.c')
patterns = find(p+'/**/*.cpp')
return result
def parseJavaFuncParams(params):
signatures = {'Z':'boolean', 'B':'byte', 'C':'char', 'S':'short', 'I':'int',
'J':'long', 'F':'float', 'V':'void', 'D':'double'}
params = params.replace('_2', ',')
#complex_types = re.findall('L(?:[a-zA-Z0-9]*(?:_(?:0|1|3))*)*_2', params)
complex_types = re.findall('L[a-zA-Z0-9_]*,', params)
replacer = dict()
for index, type in enumerate(complex_types):
replacer[type] = str(index)*len(type)
#заменяем complex_types на пустышки,
# чтобы не перепутать их с большими буквами из примитивных
params = params.replace(type, replacer[type])
#заменяем примитивные типы, разделяем запятыми
for sign, type in signatures.items():
params = params.replace(sign, type+',')
#возвращаем сложные типы
for index, type in enumerate(complex_types):
params = params.replace(replacer[type], type)
#обрабатываем сложные типы
params = params.replace('L', '').replace('_3', '[')
#заменяем уникоды
unicode_chars = re.findall('_0[0-9]+', params)
for c in unicode_chars:
params = params.replace(c, chr(int(c[2:])))
params = params.replace('_','/').replace('_1', '_')
splitted_params = params[:-1].split(',') #убираем последнюю запятую
#оставляем только само имя класса
return [p.split('/')[-1] for p in splitted_params]