From e885db613133967840e59348e8205d66b81509df Mon Sep 17 00:00:00 2001 From: DoroninaD Date: Tue, 31 Oct 2017 00:03:16 +0700 Subject: [PATCH] Objdump -> IDA --- IdaHelper.py | 311 +++++++++++++++++++++++++++++++++++++++++++++++ ObjdumpHelper.py | 148 ++++++++++++++++++++++ arm_translate.py | 2 +- main_switcher.py | 214 ++++++++++++++++++++++++++++++++ parse.py | 119 ++++++++++++------ switcher.py | 62 ++++++++++ utils.py | 7 +- 7 files changed, 820 insertions(+), 43 deletions(-) create mode 100644 IdaHelper.py create mode 100644 ObjdumpHelper.py create mode 100755 main_switcher.py create mode 100644 switcher.py diff --git a/IdaHelper.py b/IdaHelper.py new file mode 100644 index 0000000..772bad8 --- /dev/null +++ b/IdaHelper.py @@ -0,0 +1,311 @@ +import re, switcher, utils, ast, cxxfilt + +pushPatter = re.compile('(PUSH|(STM.*\s+SP!)).*{.*}', re.IGNORECASE) +popPattern = re.compile('(POP|(LDM.*\s+SP!)).*{.*}', re.IGNORECASE) +pushpopLastRegs = ['lr','pc'] +varsDict = {} + +def readLines(lines): + getVars(lines) + return [l for l in lines if '.text' in l] + + +def getPush(group): + return [g for g in group if pushPatter.search(g.line)] + +def getPops(group): + return [g for g in group if popPattern.search(g.line)] + +def getFunctions(lines): + funcs,i = [],0 + funcDict = {} + startMark, endMark = re.compile('S\s*U\s*B\s*R\s*O\s*U\s*T\s*I\s*N\s*E'), \ + 'End\s*of\s*function\s*' + while i < len(lines): + if not '.text' in lines[i]: + i+=1 + continue + if startMark.search(lines[i]): + funcAddr = getAddress(lines[i]) + rows = [] + i += 1 + # ищем имя функции + while True: + # name, addr = getFuncName(lines[i]), getAddress(lines[i]) + name = getFuncName(lines[i]) + i+=1 + if name: + funcDict[funcAddr] = name + break + + #thisFuncEndMark = re.compile(endMark+re.escape(cxxfilt.demangle(name))) + thisFuncEndMark = re.compile(endMark) + while not thisFuncEndMark.search(lines[i]): + l, bytes = lines[i], getBytes(lines[i]) + if not bytes: + i+=1 + continue + # line addr bytes thumb regs reg name + rows.append(switcher.rowModel(l, getAddress(l), bytes, isThumb(bytes,l), None, None, None, funcAddr)) + i+=1 + funcs.append(rows) + i+=1 + i+=1 + return funcs, funcDict + + +def getFuncName(line): + nameMatch = re.search('.text:[0-9a-fA-F]+\s*(WEAK)?(EXPORT)?\s[a-zA-Z_\-0-9]+',line,re.IGNORECASE) + if not nameMatch: + return None + return nameMatch.group().split(' ')[-1] + + +def getAddress(line): + addrMatch = re.search('.text:[0-9a-fA-F]+',line) + if not addrMatch: + return None + return hex(int(addrMatch.group().split(':')[-1],16)) + +def isThumb(bytes, line): + return len(bytes) == 4 or '.W' in line + +def getBytes(line): + bytes = re.search('(\s[0-9a-fA-F]{2}\s[0-9a-fA-F]{2})+',line) + if not bytes: + return None + bytesStr = bytes.group().replace(' ','').replace('\t','') + firstPair = bytesStr[2:4]+bytesStr[0:2] + secondPair = bytesStr[6:8]+bytesStr[4:6] if len(bytesStr) == 8 else '' + if '.W' in line: + return firstPair+secondPair + return secondPair+firstPair + + +def checkOnlyOnePush(group): + pushes = getPush(group) + #pushes = utils.searchPattern(pushPatter, [g.line for g in group]) + if len(pushes)!=1 or not checkBigRegs(pushes[0]): + return False + return True + + +def checkTheSameRegsForPushAndPops(group): + pushpops = dict((g.addr,re.search(pushPatter.pattern+'|'+popPattern.pattern,g.line)) + for g in group) + pushpops = dict((addr, line.group()) for addr, line in pushpops.items() if line) + + # parse regs + regsSampler = None + for addr, line in pushpops.items(): + regs = re.search('{.*}',line).group().lower()\ + .replace('{','')\ + .replace('}','')\ + .split(',') + lastReg = regs[-1] + if lastReg not in pushpopLastRegs: + return False + newRegs = [r for r in regs[:-1] if '-' not in r] + for reg in regs[:-1]: + if not '-' in reg: + continue + borders = reg.split('-') + for i in range(int(borders[0][1:]),int(borders[1][1:])+1): + newRegs.append('r{0}'.format(i)) + newRegs = sorted(newRegs) + if not regsSampler: + regsSampler = sorted(newRegs) + elif len(newRegs)!=len(regsSampler) or \ + any(newRegs[i] != regsSampler[i] for i in range(len(newRegs))): + return False + item = [g for g in group if g.addr == addr][0] + item.regs, item.reg = newRegs, lastReg + return group + + +def checkBigRegs(line): + big_regs = ['sp', 'ip', 'lr', 'pc', 'r12'] + return not any(big_regs[i] in line.line for i in range(len(big_regs))) + +def getRelativeRegs(group): + pattern = re.compile('.*(ldr|str).*\[.*\], ', re.IGNORECASE) + return [g for g in group if pattern.search(g.line)] + +def searchInLines(pattern, group): + return [g for g in group if pattern.search(g.line)] + #return utils.searchPattern(pattern, [g.line for g in group]) + +def searchPattern(pattern, line): + return pattern.search(line.line) + +def searchPatterns(pattern, lines): + return list(filter(None,[pattern.search(l.line) for l in lines])) + + + +def getNumber(line): + a = line.line + for key, value in [(key, value) for key, value in varsDict.items() if key[0]==line.funcAddr]: + a = a.replace(key[1], value) + number = re.search('#-?(0x)?[0-9a-f\-\+x]+', a, re.IGNORECASE) + if not number: + return 0 + try: + return ast.literal_eval(number.group()[1:]) + except: + return None + + + +def getVars(lines): + # pattern = re.compile('(ptr|buf|arg|varg|var)_?[0-9a-fr]+\s*=\s*-?(0x)?[a-f0-9]+\s',re.IGNORECASE) + pattern = re.compile('[a-z]+_?[0-9a-fr]*\s*=\s*-?(0x)?[a-f0-9]+\s',re.IGNORECASE) + vars = [l for l in lines if pattern.search(l)] + for var in vars: + items = ' '.join(var.split(' ')[1:]) + name = re.search('[a-z]+_?[0-9a-fr]*\s', items,re.IGNORECASE)\ + .group().strip() + addr = getAddress(var) + value = re.search('=\s*-?(0x)?[a-f0-9]+\s',items,re.IGNORECASE)\ + .group().replace('=',' ').strip() + varsDict[(addr, name)] = value + + + + + +def hadleExternalJumps(groups, conditions, funcAddrDict): + have_external_jumps = {} + ext_jumps_list = {} + external_jumps = [] + + + # убираем b, которые внутри функции + for index, group in enumerate(groups): + containsJumpsPattern = re.compile('\sb('+'|'.join(conditions)+')\s',re.IGNORECASE) + #clear = [not group[i][2].startswith('b') for i in range(len(group)) if len(group[i]) > 2] + containsJumps = searchInLines(containsJumpsPattern, group) + #if all([not containsJumpsPattern.search(g.line) for g in group]): + if len(containsJumps)==0: + continue + first, last = group[0], group[-1] + # if len(first) == 2: + # first = group[1] + if pushPatter.search(first.line): + #and last[2].startswith('pop') or last[2].startswith('ldmia'): + first_addr, last_addr = int(first.addr, 16), int(last.addr, 16) + has_ext_jumps = False + jumps = [] + for g in containsJumps: + if re.search('lr|r[0-9|10|11|12]',g.line): #todo ??? + continue + #if addr < first_addr or addr > last_addr: + # if index!=len(groups)-1: + # last_addr = int(groups[index+1][0][0],16) + #addr = int(g.addr,16) + addr = g.line[containsJumpsPattern.search(g.line).end():].strip().split(';')[0] + if addr in funcAddrDict: + addr = funcAddrDict[addr] + addr = re.sub('[a-z]+_','0x',addr) + addr = int(addr, 16) + #addr = int(addr.replace('[a-z]+_','0x'), 16) + if addr < first_addr or addr > last_addr: + has_ext_jumps = True + jumps.append(addr) + #break + if has_ext_jumps: + ext_jumps_list[index] = jumps + external_jumps.extend(jumps) + have_external_jumps[index] = group + + external_jumps = set(external_jumps) + external_jumps_res = {} + jumpFunc = {} + for jump in list(external_jumps): + try: + destinationFunc = [g for g in groups + if int(g[0].addr,16) <= jump + and int(g[-1].addr,16)>=jump][0] + except: + continue #todo + #for index, row in enumerate(lines): + destinationRow = [row for row in destinationFunc if row.addr == hex(jump)][0] + destinationIndex = destinationFunc.index(destinationRow) + # нашли строку, на которую jump + #проверяем, может прыгнули на push + # push_method = re.search('push|stmdb', destinationRow) + # if push_method is not None: + # external_jumps_res[jump] = 'push' + # continue + #идем вниз, ищем push/pop/b + jumpFunc[jump] = destinationFunc + for index,r in enumerate(destinationFunc[destinationIndex:]): + if pushPatter.search(r.line): + #push_method = re.search('push|stmdb', r) + #if push_method is not None: + #external_jumps.remove(jump) + external_jumps_res[jump] = 'push' + break + if popPattern.search(r.line): + #pop_method = re.search('pop|ldmia', r) + #if pop_method is not None: + # опасно! надо что-то сделать! + # либо не обрабатывать эту функцию и все, которые на нее ссылаются по addr + # либо связать их и обрабатывать вместе + external_jumps_res[jump] = 'pop' + break + #jump_method = re.search('\sb({0})?\s'.format(conditions_pattern), r) + #if jump_method is not None: + if index!=0 and containsJumpsPattern.search(r.line): + # опасно! надо что-то сделать! + # сделать рекурсию? + external_jumps_res[jump] = 'jump' + break + + for key, value in ext_jumps_list.items(): + for i in list(value): + if i in external_jumps_res.keys() and external_jumps_res[i] == 'push': + ext_jumps_list[key].remove(i) + + for key, value in ext_jumps_list.items(): + if len(value) == 0: + have_external_jumps.pop(key) + external_jumps_res = [addr for addr in external_jumps_res if external_jumps_res[addr] != 'push'] + # убираем те, которые в have_external_jumps + if len(have_external_jumps)==0: + return groups + gr = groups + groups = [] + removed_gr = 0 + #убираем функции с внешними прыжками и с фунциями, на которые прыгнули (если не push) + for index, f in enumerate(gr): + if index in have_external_jumps.keys(): + continue + nojumps = True + for jump in external_jumps_res: + if f==jumpFunc[jump]: + nojumps = False + if nojumps: + groups.append(index) + return groups + + + + # for index, group in enumerate(gr): + # # если jump в этой группе, то ее тоже не обрабатываем + # first_addr, last_addr = group[0][0], gr[index + 1][0][0] if index != len(gr) - 1 else 0xFFFFFFFF + # handle = True + # for jump in external_jumps_res: + # if int(jump, 16) >= int(first_addr, 16) and int(jump, 16) <= int(last_addr, 16): + # handle = False + # removed_gr += 1 + # break + # if handle and index not in have_external_jumps.keys(): + # # убираем b/beq/... + # group = [g for g in group if not g[2].startswith('b')] + # groups.append(group) + # return groups + + + + diff --git a/ObjdumpHelper.py b/ObjdumpHelper.py new file mode 100644 index 0000000..0169e8e --- /dev/null +++ b/ObjdumpHelper.py @@ -0,0 +1,148 @@ +import re, utils + + +def readLines(lines): + indices = [i for i, s in enumerate(lines) if '.text' in s] + lines = lines[indices[0]:] + return lines + + + +def getFunctions(lines): + stack_lines = [] + + index = 0 + # выбираем только строки с push/pop, разбираем их на составляющие + # Также ищем названия функций : + function_name = '' + functions = dict() # адрес - функция + for line in lines: + is_function_name = re.search('<.*>:', line) + if is_function_name is not None: + function_name = is_function_name.group()[1:-2] + address = utils.getAddressFromLine(line) + stack_lines.append((address,function_name)) + andeq = re.search('andeq', line) + if andeq is not None: + stack_lines.append((address, "andeq", "null")) + + #stack_line = re.match('.*((push(.w)?|stmdb(.w)?\s*sp!).*lr}|(pop[a-z]*|ldmia[a-z]*\s*sp!).*(pc|lr)}).*', line) + stack_line = re.match('.*((push|stmdb[a-z]*\s*sp!).*lr}|(pop|ldmia[a-z]*\s*sp!).*(pc|lr)}).*', line) + + branch_line = re.match('.*\s(bx|b)({0})?\s.*'.format(conditions_pattern), line) + if branch_line is not None: + address = utils.getAddressFromLine(line) + if address == '3db8': + aaa = 1 + code, is_thumb = utils.getCodeFromLine(line) + jumpto = re.search('\s(bx|b)({0})?\s[0-9a-z]+'.format(conditions_pattern),branch_line.group()).group().split()[-1] + method = re.search('\s(bx|b)({0})?'.format(conditions_pattern),branch_line.group()).group().strip() + stack_lines.append((address, code, method, jumpto, is_thumb, index)) + if stack_line is not None: + method = re.search('(push|stmdb|pop|ldmia)[a-z]*', line).group() + # берем все регистры в {} и убираем последний (это lr или pc) + # в дальнешем будем исключать строки, в которых есть регистры > r11 + #registers = re.search('{.*}', line).group().replace('}','').replace('{','').replace(' ','').split(',')[:-1] + registers = re.search('{.*}', line).group().replace('}','').replace('{','').replace(' ','').split(',') + last_reg = registers[-1] + #registers = re.findall("r11|r10|r[1-9]|sp", stack_line.group()) + # убираем лишний sp (sp!) + #if (method.startswith('stm') or method.startswith('ldm')) and 'sp' in registers: + # registers.remove('sp') + address = utils.getAddressFromLine(line) + code, is_thumb = utils.getCodeFromLine(line) + stack_lines.append((address, code, method, registers[:-1], is_thumb, index, last_reg)) + if re.search('pop(.w)?|ldmia(.w)?', method) is None: + functions[address] = function_name.split('@')[0] + function_name = '' + index += 1 + + # выделяем функции, для которых нет имени + if RETURN_TYPES: + noname_functions = dict((addr, func) for addr, func in functions.items() if func == '') + if len(noname_functions) > 0: + nonstatic_folder = config.get('nonstatic_app_directory') + nonstatic_file = os.path.join(nonstatic_folder, os.path.basename(path)+'.txt') + + newNames = dict((addr, func) for addr, func in noname_functions.items() if func!='') + + while True and os.path.exists(nonstatic_file): + noname_len = len(noname_functions) + for addr in list(noname_functions): + name = static_functions_helper.getName(lines, addr, nonstatic_file, newNames) + if name != '': + newNames[addr] = name + noname_functions.pop(addr) + if len(noname_functions) == noname_len: + break + + for addr in newNames: + functions[addr] = newNames[addr] + + return combineFunction(stack_lines), functions + + + +def combineFunction(stack_lines): + functions = [] + items = [] + pops = ['pop','pop.w','ldmia','ldmia.w'] + + def is_function_start(line): + return len(line)==2 or line[2].startswith('push') or line[2].startswith('stmdb') + + for index, line in enumerate(stack_lines): + if is_function_start(line): + if line[0]=='ac28': + aaa=1 + functions.append(items) + items = [] + items.append(line) + continue + + if (line[1]!='andeq'): + items.append(line) + + if (line[2] in pops and line[6]=='pc')\ + or (line[2]=='bx' and line[3]=='lr') or line[1]=='andeq': #предполагаемый конец функции + # посмотреть, есть ли прыжки дальше (до начала следующей функции) + # прыжки = b(eq/...) + # также конец функции только если неусловный pop и bx\ + + #находим следующий push/stmdb + ind, next_func_address = index+1, sys.maxsize + while True: + if ind >= len(stack_lines): + break + if is_function_start(stack_lines[ind]): + next_func_address = int(stack_lines[ind][0],16) + break + ind+=1 + + jumps, func_end = [], int(line[0],16) + for item in [i for i in items if len(i)>2]: + if item[2].startswith('b') and not re.match('lr|pc|(r[0-9]*)',item[3]): + jump_to = int(item[3],16) + if func_end < jump_to < next_func_address: + jumps.append(item[3]) + if len(jumps)==0: # если нет, то конец функции + functions.append(items) + items = [] + else: #todo содержит прыжки, посмотерть, выходят ли они за пределы pop pc до push + # если выходят, то не конец фнукции, идем дальше + jumps = [] + continue + + filtered = [] + for f in functions: + if len(f)<2 or len(f[0])<3 or not (f[0][2].startswith('push') or f[0][2].startswith('stmdb')): + continue + push_regs, success = f[0][3], True + for pop in f: + #убираем функции, в котрых регистры в push и pop разные + if (pop[2].startswith('pop') or pop[2].startswith('ldmia')) and pop[3]!=push_regs: + success = False + break + if success: + filtered.append(f) + return filtered diff --git a/arm_translate.py b/arm_translate.py index edb886b..2f4716f 100755 --- a/arm_translate.py +++ b/arm_translate.py @@ -53,7 +53,7 @@ def code(old_code, mask, s, is_thumb): return c[4:] + c[:4] def makeLdrOrStrInner(old_instr, old_code, rx, ry, a, is_thumb, l): # ldr rx, [ry + a] - + old_instr = old_instr.lower() s = a # 11-0 if old_instr.endswith('.w') \ diff --git a/main_switcher.py b/main_switcher.py new file mode 100755 index 0000000..a11db3b --- /dev/null +++ b/main_switcher.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- +import re, utils, arm_translate, parse,parse_functions_utils, static_functions_helper, config_parser, os +import cxxfilt, colored, sys +import switcher, codecs + +conditions = ['eq','ne','cs','hs','cc','lo','mi','pl','vs','vc','hi','ls','ge','lt','gt','le','al'] +conditions_pattern = '|'.join(conditions) + +NEW = True +RETURN_TYPES = False + +def findSpSubbed(groups): + containSpSubbedPattern = re.compile('\ssub\s+sp,',re.IGNORECASE) + matching_groups = [] + for group in groups: + matches = switcher.searchInLines(containSpSubbedPattern,group) + if len(matches)>0: + matching_groups.append(group) + return matching_groups + + +def findBxLR(groups): + containSpSubbedPattern = re.compile('bx\s+lr',re.IGNORECASE) + matching_groups = [] + for group in groups: + matches = switcher.searchInLines(containSpSubbedPattern,group) + if len(matches)>0: + matching_groups.append(group) + return matching_groups + + + + +def run(path, start_group, end_group, DEBUG, config): + print('DEBUG ', DEBUG) + f = codecs.open(path+'.txt', 'r','utf-8', errors="ignore") + lines = switcher.readLines(f) + f.close() + + groups, addrFuncDict = switcher.getFunctions(lines) + funcAddrDict = dict(zip(addrFuncDict.values(),addrFuncDict.keys())) + + g = switcher.handleExternalJumps(groups, conditions, funcAddrDict) + + #находим тип функций + function_types = [] + if RETURN_TYPES: + print('FUNCTIONS') + function_types = parse_functions_utils.getFunctionsReturnTypeSize(addrFuncDict, config) + #todo использовать числа при рандомизации + # all_registers = ['r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'r11'] + + + #ищем push, записываем для него все pop с теми же регистрами, пока не встретим новый push + #или название функции <..> + i = 0 + # groups = [] + # lst = [] + # while i < len(stack_lines)-1: + # if len(stack_lines[i]) == 2: + # #lst.append(stack_lines[i]) + # i+=1 + # continue + # probably_push = str(stack_lines[i][2]) + # if probably_push.startswith('push') or probably_push.startswith('stmdb'): + # #lst = [stack_lines[i]] + # lst.append(stack_lines[i]) + # j = i + 1 + # while len(stack_lines[j]) > 2 \ + # and (str(stack_lines[j][2]).startswith("b") or ((str(stack_lines[j][2]).startswith('pop') or str(stack_lines[j][2]).startswith('ldmia')) \ + # and stack_lines[j][3] == stack_lines[i][3])): + # lst.append(stack_lines[j]) + # j += 1 + # if j >= len(stack_lines): + # break + # if j - i > 1: + # groups.append(lst.copy()) + # lst.clear() + # i = j + # lst.clear() + # else: + # i += 1 + init_group_len = len(groups) + + + + containSpSubbed = findSpSubbed(groups) + print('Groups with subbed sp:', len(containSpSubbed)) + + #difference = [g for g in groups if g not in f] + + #groups = switcher.hadleExternalJumps(groups) + + # фильтруем группы - убираем те, в которых последний pop нe pc + print ('GROUPS:', len(groups)) + + + print("Groups after jumps removing", len(groups)) + + # gr = groups + # groups = [] + # for group in gr: + # #if all(g[6]=='pc' for g in group[1:]): + # #берем только те функции, в которых нет pop lr + # #if len(group) > 1 and all(g[6]=='pc' for g in group[1:]): + # if len(group) > 1: + # groups.append(group) + # #if group[-1][6] == 'pc': + # # groups.append(group) + containBXLRbefore = findBxLR(groups) + + # check only one push + # the same regs for push and pops + groups = list(filter(None,[switcher.checkSuitable(g) for g in groups])) + + containBXLR = findBxLR(groups) + print('CONTAINS BX LR:',len(containBXLR)) + + #groups = [group for group in groups if group[i][6]=='pc' for i in range(1,len(group))] + print ('Functions with push-pop pairs', len(groups)) + + #добавляем в to_write (адрес, количество старых байт, новые байты) для перезаписи + groups_count = 0 + to_write = [] + l = 0 + full_registers_count = 0 + #1935-36 + print(start_group, ":", end_group) + + regs_added = 0 + handledGroups = [] + for group in groups[start_group:end_group]: # 66 libcrypto - pop lr => bl - перезапись регистров + #first, last = group[0], group[-1] + push, pops = switcher.getPushes(group)[0], switcher.getPops(group) + l+=1 + + # добавляем регистры в начало, считает их количество + real_reg_count = len(push.regs) + return_size = function_types[push.addr] if RETURN_TYPES else 4 + new_registers, table = utils.addRegistersToStartAndEnd\ + (push.regs, push.bytes, return_size) + if new_registers == -1: + full_registers_count+=1 + continue + # меняем втутренние строки, взаимодействующие с sp + inner_lines = parse.getAllSpLinesForLow(group, table) + + if inner_lines == -1: + continue + groups_count+=1 + handledGroups.append(group) + # добавляем в to_write (адрес, количество старых байт, новые байты) push + #print (first[0]) + + to_write.append((push.addr, len(push.bytes) // 2, + utils.toLittleEndian + (arm_translate.pushpopToCode + (new_registers, push.bytes, push.thumb, real_reg_count, False)))) # добавляем новый push + + # добавлаем все pop + for pop in pops: + to_write.append((pop.addr, len(pop.bytes) // 2, + utils.toLittleEndian( + arm_translate.pushpopToCode + (new_registers, pop.bytes, pop.thumb, real_reg_count, True)))) # добавляем новый pop + + if len(inner_lines) > 0: + to_write.extend(inner_lines) + funcAddr = group[0].addr + key = cxxfilt.demangle(addrFuncDict[funcAddr]) \ + if addrFuncDict[funcAddr]!='' else push.addr + # print(colored.setColored('{0}: '.format(key), colored.OKGREEN) + 'old {0}, new {1}'.format(push.regs, new_registers)) + regs_added += len(new_registers) - len(push.regs) + secured = groups_count/init_group_len*100 + # output = 'End:{0}, full regs:{1}, secured:{2}%, average randomness:{3}'\ + # .format(groups_count, full_registers_count, secured, regs_added/groups_count) + + output = 'End:{0}, full regs:{1}, secured:{2}%'\ + .format(groups_count, full_registers_count, secured) + if groups_count>0: + output += ", average randomness:{0}".format(regs_added/groups_count) + + colored.printColored(output, colored.BOLD) + + onlyForContainsSub = [item for item in containSpSubbed if item not in handledGroups] + onlyWithPushes = [item for item in handledGroups if item not in containSpSubbed] + output = 'Only for SUB_SP:{0}, only for PUSH:{1}, common: {2}'\ + .format(len(onlyForContainsSub), len(onlyWithPushes), len(handledGroups) - len(onlyWithPushes)) + colored.printColored(output, colored.BOLD) + + + #переписываем файл + f = open(path+'_old.so', 'br') + text = f.read() + f.close() + + for line in to_write: + offset = int(line[0],16) + text = text[:offset] + line[2] + text[offset+line[1]:] + + f = open(path+'.so', 'bw') + f.write(text) + f.close() + + + + + + + + + + + diff --git a/parse.py b/parse.py index f2e81ba..9c7b3fe 100755 --- a/parse.py +++ b/parse.py @@ -1,16 +1,19 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -import re, arm_translate, utils +import re, arm_translate, utils, switcher def getRxLines(lines, line, table, sp_subbed, sub_ind): #line = add_sp_to_reg[0].group() - c = re.search('#[0-9]+', line) - c = 0 if c is None else int(c.group()[1:]) - reg = re.search('r11|r10|r12|r[0-9]', line).group() + c = switcher.getNumber(line) + if c is None: + return -1 + #c = 0 if not c else int(c.group()[1:],16) if "0x" in c.group() else int(c.group()[1:]) + regsPattern = re.compile('r11|r10|r12|r[0-9]', re.IGNORECASE) + reg = switcher.searchPattern(regsPattern, line).group() #определяем строку, с которой будем искать строки вида [reg, #d] - start_ind = list(lines).index(line+'\n') + start_ind = list(lines).index(line) # предполагаем, что изменение sp происходит только в начале и конце функции todo # если sp еще не отнят, тогда не нужно учитывать a @@ -20,9 +23,13 @@ def getRxLines(lines, line, table, sp_subbed, sub_ind): # end_reg = list(filter(None, # [re.search('.*(mov(eq|ne)?|(v)?ldr(b|h|sb|sh|sw)?|add)(.w)?\s{0}, .*'.format(reg), line) for # line in lines[start_ind+1:]])) - end_reg = utils.searchRe('.*(mov(eq|ne)?|(v)?ldr(b|h|sb|sh|sw)?|add)(.w)?\s{0}, .*'.format(reg),lines[start_ind+1:]) + clearRegPattern = re.compile\ + ('.*(mov(eq|ne)?|(v)?ldr(b|h|sb|sh|sw)?|add)(.w)?\s{0},\s?.*'.format(reg), + re.IGNORECASE) + end_reg = switcher.searchInLines(clearRegPattern,lines[start_ind+1:]) + #определяем строку, ДО которой будем искать строки вида [reg, #d] (mov затирает sp) - end_ind = list(lines).index(end_reg[0].group()+'\n') if len(end_reg) > 0 else len(lines) + end_ind = list(lines).index(end_reg[0].group()) if len(end_reg) > 0 else len(lines) to_write = [] # Ищем строки вида [reg, #d] @@ -31,32 +38,41 @@ def getRxLines(lines, line, table, sp_subbed, sub_ind): # line in lines[start_ind:end_ind]])) # ... [rx] - use_reg = utils.searchRe('.*(ldr|str)(b|h|sb|sh|sw|d)?(.w)?.*\[{0}(, #\-?[0-9]+\])?'.format(reg), lines[start_ind:end_ind]) + useRegPattern = re.compile\ + ('.*(ldr|str)(b|h|sb|sh|sw|d)?(.w)?.*\[{0}(,\s?#\-?(0x)?[0-9a-f]+.*\])?'.format(reg), + re.IGNORECASE) + use_reg = switcher.searchInLines(useRegPattern, lines[start_ind:end_ind]) + d = 0 #todo если будет str rx, [sp, #] и уже добавлен в to_write по sp, будет перезаписано? for l in use_reg: - instr = re.search('v?(ldr|str)(b|h|sb|sh|d)?(.w)?', l.group()).group() - try_d = re.search('#-?[0-9]+', l.group()) - d = int(try_d.group().replace('#', '')) if try_d is not None else 0 + pattern = re.compile('v?(ldr|str)(b|h|sb|sh|d)?(.w)?',re.IGNORECASE) + instr = switcher.searchPattern(pattern, l).group() + d = switcher.getNumber(l) + if d is None: + return -1 + #try_d = re.search('#-?[0-9]+', l.group()) + #d = int(try_d.group().replace('#', '')) if try_d is not None else 0 # если d < 0 => если c-d<0 =>[reg, #d-new_regs_count*4] n = c + d - sp_subbed if n >= 0: # todo а что если будет sub rx, sp? - rx = re.search('(\s+r10|r11|r12|sp|lr|pc|r[0-9]|((d|s)(([1-2][0-9])|3[0-1]|[0-9]))),', - l.group()).group().strip().replace(',', '') - code, is_thumb = utils.getCodeFromLine(l.group()) + pattern = re.compile('(\s+r10|r11|r12|sp|lr|pc|r[0-9]|((d|s)(([1-2][0-9])|3[0-1]|[0-9]))),', re.IGNORECASE) + rx = switcher.searchPattern(pattern, l).group().strip().replace(',', '') + #code, is_thumb = utils.getCodeFromLine(l.group()) offset = changeOffset(n, d, table) #offset = d + before_reg_count * 4 #if n >= real_reg_count * 4: # offset += after_reg_count * 4 - new_instr_code = arm_translate.makeLdrOrStr(instr, code, rx, reg, offset, is_thumb, l.group()) + new_instr_code = arm_translate.makeLdrOrStr(instr, l.bytes, rx, reg, offset, l.thumb, l.line) # to_write ... [reg, #d-new_regs_count*4] - to_write.append((utils.getAddressFromLine(l.group()), len(code) // 2, utils.toLittleEndian(new_instr_code))) + to_write.append((l.addr, len(l.bytes) // 2, utils.toLittleEndian(new_instr_code))) # str rx, [...] - str_reg = list(filter(None,[re.search('.*str(b|h|sb|sh|sw|d)?(.w)?\s{0}.*'.format(reg), line)for line in lines[start_ind:end_ind]])) - if len(str_reg) > 0: + strRegPattern = re.compile('.*str(b|h|sb|sh|sw|d)?(.w)?\s{0}.*'.format(reg), re.IGNORECASE) + str_reg = switcher.searchInLines(strRegPattern, lines[start_ind:end_ind]) + if len(str_reg) > 0: return -1 return to_write @@ -65,69 +81,94 @@ def getAllSpLinesForLow(lines, table): to_write = [] # ldr/str/... rx, [sp], ry - не знаем значение ry, не можем сделать правильное смещение => не обрабатываем такие функции - register_relative = utils.searchRe('.*(ldr|str).*\[.*\], ', lines) + register_relative = switcher.getRelativeRegs(lines) if len(register_relative) > 0: return -1 # не обрабатываем функции, в которых есть дополнительные push - more_pushes = utils.searchRe('.*\d}', lines) + more_pushes = switcher.searchInLines(re.compile('.*\d}',re.IGNORECASE),lines) if len(more_pushes) > 0: return -1 + + # не обрабатываем функции, в которых есть дополнительные push + # more_pushes = utils.searchRe('.*\d}', lines) + # if len(more_pushes) > 0: + # return -1 + + # ищем строки sub (add) sp, #a => sub (add) sp, #a+new_regs_count*4 => to_write #sub_add_sp_lines = list(filter(None,[re.search('.*(add|sub)(.w)?\s*sp(, sp)?, #[0-9]+', line) for line in lines])) - sub_add_sp_lines = utils.searchRe('.*(add|sub)(.w)?\s*sp(, sp)?, #[0-9]+', lines) + addSubPattern = re.compile('.*(add|sub)(.w)?\s*sp(,\s?sp)?,\s?#[0-9]+', re.IGNORECASE) + #sub_add_sp_lines = utils.searchPattern(addSubPattern, lines) + sub_add_sp_lines = switcher.searchInLines(addSubPattern, lines) # если строки нет, выходим (потом подумать,как сделать) todo #if len(sub_add_sp_lines) < 2: #не нашли sub и add #return [] if len(sub_add_sp_lines) != 0: - try: - sub_ind = lines.index([s for s in lines if str(s).startswith(sub_add_sp_lines[0].group())][0]) - except: - sub_ind = lines.index(sub_add_sp_lines[0].group() + '\n') - a = int(sub_add_sp_lines[0].group().split('#')[-1]) + sub_ind = lines.index(sub_add_sp_lines[0]) + # pattern = re.compile('#(0x)?[0-9a-f]+',re.IGNORECASE) + # a = switcher.searchPattern(pattern, sub_add_sp_lines[0]).group()[1:] + # a = int(a,16) if '0x' in a else int(a) + a = switcher.getNumber(sub_add_sp_lines[0]) + if a is None: + return -1 + + #a = int(re.search('#(0x)?[0-9a-f]+',sub_add_sp_lines[0], re.IGNORECASE).group(), 16) else: a = 0 sub_ind = 0 #ищем строки вида [sp, #b] #use_sp_lines = list(filter(None,[re.search('.*(ldr|str)(b|h|sb|sh|d)?(.w)?.*\[sp, #[0-9]+\].*', line) for line in lines])) - use_sp_lines = utils.searchRe('.*(ldr|str)(b|h|sb|sh|d)?(.w)?.*\[sp, #[0-9]+\].*', lines) + useSpPattern = re.compile('.*(ldr|str)(b|h|sb|sh|d)?(.w)?.*\[sp,\s?#-?(0x)?[0-9a-f]+.*\].*',re.IGNORECASE) + use_sp_lines = switcher.searchInLines(useSpPattern, lines) #todo #for i in use_sp_lines: #print(i.group()) - if len([s for s in use_sp_lines if '!' in str(s.group())])>0: + if any(['!' in l.line for l in use_sp_lines]): + #if len([s for s in use_sp_lines if '!' in str(s.group())])>0: return -1 - - for l in use_sp_lines: - instr = re.search('v?(ldr|str)(b|h|sb|sh|d)?(.w)?',l.group()).group() - b = int(re.search('#[0-9]+', l.group()).group().replace('#','')) + LdrStrPattern = re.compile('v?(ldr|str)(b|h|sb|sh|d)?(.w)?',re.IGNORECASE) + instr = switcher.searchPattern(LdrStrPattern, l).group().lower() + b = switcher.getNumber(l) + if b is None: + return -1 + + #b = int(re.search('#[0-9]+', l.group()).group().replace('#','')) if b-a >= 0: - rx = re.search('(\s+r10|r11|r12|sp|lr|pc|r[0-9]|((d|s)(([1-2][0-9])|3[0-1]|[0-9]))),', l.group()).group().strip().replace(',','') - code, is_thumb = utils.getCodeFromLine(l.group()) + pattern = re.compile\ + ('(\s+r10|r11|r12|sp|lr|pc|r[0-9]|((d|s)(([1-2][0-9])|3[0-1]|[0-9]))),', re.IGNORECASE) + rx = switcher.searchPattern(pattern, l).group().strip().replace(',','') + #code, is_thumb = utils.getCodeFromLine(l.group()) offset = changeOffset(b-a, b, table) - new_instr_code = arm_translate.makeLdrOrStr(instr,code , rx, 'sp', offset, is_thumb, l.group()) + new_instr_code = arm_translate.\ + makeLdrOrStr(instr,l.bytes , rx, 'sp', offset, l.thumb, l.line) # to_write ... [sp, #b + new_regs_count*4] - to_write.append((utils.getAddressFromLine(l.group()), len(code) // 2, utils.toLittleEndian(new_instr_code))) + to_write.append((l.addr, + len(l.bytes) // 2, utils.toLittleEndian(new_instr_code))) #ищем строки вида add rx, sp, (#c) - должна быть одна ? todo #add_sp_to_reg = list(filter(None, [re.search('.*(add(.w)?|mov)\s*(r[0-9]|r10|r11|r12), sp(, #[1-9]+)?.*', line) for line in lines])) - add_sp_to_reg = utils.searchRe('.*(add(.w)?|mov)\s*(r[0-9]|r10|r11|r12), sp(, #[1-9]+)?.*', lines) + addSpToRegPattern = re.compile\ + ('.*(add(.w)?|mov)\s*(r[0-9]|r10|r11|r12),\s?sp(,\s?#[1-9]+)?.*', re.IGNORECASE) + add_sp_to_reg = switcher.searchInLines(addSpToRegPattern, lines) #todo #for i in add_sp_to_reg: # print(i.group()) - if len([s for s in add_sp_to_reg if '!' in str(s.group())])>0: + #if len([s for s in add_sp_to_reg if '!' in str(s.group())])>0: + if any(['!' in l.line for l in add_sp_to_reg]): return -1 if len(add_sp_to_reg) > 0: for l in add_sp_to_reg: - new = getRxLines(lines, l.group(), table, a, sub_ind) + new = getRxLines(lines, l, table, a, sub_ind) if new == -1: return -1 to_write.extend(new) diff --git a/switcher.py b/switcher.py new file mode 100644 index 0000000..e7951ca --- /dev/null +++ b/switcher.py @@ -0,0 +1,62 @@ +import IdaHelper, ObjdumpHelper, re +from recordclass import recordclass + +isIDA = True + +rowModel = recordclass('row', 'line addr bytes thumb regs reg funcName funcAddr') +rowModel.__new__.__defaults__=(None, None, None, None, None, None, None, None) + +def readLines(file): + lines = file.readlines() + if isIDA: + return IdaHelper.readLines(lines) + return ObjdumpHelper.readLines(lines) + +def searchInLines(pattern, group): + if isIDA: + return IdaHelper.searchInLines(pattern, group) + +def searchPattern(pattern, line): + if isIDA: + return IdaHelper.searchPattern(pattern, line) + +def getFunctions(lines): + if isIDA: + return IdaHelper.getFunctions(lines) + + +def getPushes(group): + if isIDA: + return IdaHelper.getPush(group) + +def getPops(group): + if isIDA: + return IdaHelper.getPops(group) + + +def checkSuitable(group): + if isIDA: + if not IdaHelper.checkOnlyOnePush(group): + return [] + return IdaHelper.checkTheSameRegsForPushAndPops(group) + +def getRelativeRegs(group): + if isIDA: + return IdaHelper.getRelativeRegs(group) + +def getNumber(line): + if isIDA: + return IdaHelper.getNumber(line) + +def getVars(lines): + if isIDA: + return IdaHelper.getVars(lines) + + +def handleExternalJumps(groups, conditions, funcAddrDict): + if isIDA: + return IdaHelper.hadleExternalJumps(groups, conditions,funcAddrDict) + + + + diff --git a/utils.py b/utils.py index 62f12c8..49e5d61 100755 --- a/utils.py +++ b/utils.py @@ -114,10 +114,11 @@ def getAddressFromLine(line): def getCodeFromLine(line): code = str(line.split('\t')[1].replace('\t', '')).strip() - if len(code)!=4 and len(code)!=9: - i = 1 return code.replace(' ', ''), len(code) in [4, 9] def searchRe(regex, lines): - return list(filter(None, [re.search(regex, line) for line in lines])) \ No newline at end of file + return list(filter(None, [re.search(regex, line) for line in lines])) + +def searchPattern(pattern, lines): + return list(filter(None, [pattern.search(line) for line in lines]))