import re
def read_file(file_name):
f = open(file_name, 'r')
lines = f.readlines()
f.close()
return lines
def write_file(file_name, lines):
f = open(file_name, 'w')
f.writelines(lines)
f.close()
return
def has_substring(s, s_list):
if isinstance(s_list, str):
return s.find(s_list) != -1
for v in s_list:
if s.find(v) != -1:
return True
return False
def has_consecutive_substring(lines, i, s_list):
for k in range(len(s_list)):
if not has_substring(lines[i+k], s_list[k]):
return False
return True
def extract_imm_value(s): # type:(str) -> int
# reg, 0x12345678
# -> return 12345678
pat = ',0x'
i = s.find(pat) + len(pat)
i_end = s.find(' ', i + 1)
return int(s[i:i_end], 16)
def hex_with_sign(v): # type:(int) -> str
# 0x12345678 / 0xFFFFFFF8
# -> return +0x12345678 / -0x8
s = ''
if v >= 0x80000000:
v = 0x100000000 - v
s += '-'
else:
s = '+'
s += hex(v)
s = s.replace('L', '')
return s
def hex_without_sign(v): # type:(int) -> str
# 0x12345678
# -> return 0x12345678
s = ''
s += hex(v)
s = s.replace('L', '')
return s
def evaluate_ss_edi_expr(s, edi): # type:(str,int) -> str
# 1. xxx reg,dword ptr ss:[ebp+edi]
# 2. xxx reg,dword ptr ss:[ebp+edi+0x4BF7590F]
# 3. xxx reg,dword ptr ss:[ebp+edi*2+0xDB8A93CE]
# -> xxx reg,dword ptr ss:[ebp+imm]
if s.find('[ebp+edi+') != -1:
v = re.search('\\[ebp\\+edi\\+0x(.*)\\]', s).group(1)
v = int(v, 16)
edi += v
elif s.find('[ebp+edi*') != -1:
m = re.search('\\[ebp\\+edi\\*([^+]+)\\+0x(.*)\\]', s)
v1 = int(m.group(1), 16)
v2 = int(m.group(2), 16)
edi = edi * v1 + v2
elif s.find('[ebp+edi]') == -1:
raise Exception('')
edi %= 0x100000000
v = '[ebp%s]' % hex_with_sign(edi)
s = re.sub('\\[.*\\]', v, s)
return s
def pass_nops(lines):
new_lines = []
i = 0
while i < len(lines):
line = lines[i]
if has_substring(line, ['test', 'cld', 'cmp', 'nop', 'cmc', 'bt', 'clc', 'stc']):
i += 1
continue
# or exx,0x0
if re.search('or e..,0x0', line) is not None:
i += 1
continue
# and exx,0xFFFFFFFF
if re.search('and e..,0xFFFFFFFF', line) is not None:
i += 1
continue
line = line.replace('transfor.', '0x')
new_lines.append(line)
i += 1
return new_lines
def pass_edi(lines):
# mov edi,0x
# add edi,0x
# ...
# xxx reg,edi
# -> xxx reg, imm
new_lines = []
i = 0
while i < len(lines):
line = lines[i]
if has_substring(line, 'mov edi,0x'):
k = 0
while has_substring(lines[i + k + 1], ['sub edi', 'add edi']):
k += 1
if k > 0:
if has_substring(lines[i + k + 1], ',edi'):
edi = 0
for z in range(i, i + k + 1):
v = extract_imm_value(lines[z])
if has_substring(lines[z], 'sub'):
edi -= v
else:
edi += v
edi %= 0x100000000
line = lines[i + k + 1].replace(',edi ', ',%s' % hex_without_sign(edi))
new_lines.append(line)
i += k + 2
continue
new_lines.append(line)
i += 1
return new_lines
def pass_ebp(lines):
# mov edi,0x
# add edi,0x
# ...
# xxx reg, [ebp+edixxx]
# -> xxx reg, imm
new_lines = []
i = 0
while i < len(lines):
line = lines[i]
if has_substring(line, 'mov edi,0x'):
k = 0
while has_substring(lines[i + k + 1], ['sub edi', 'add edi']):
k += 1
if k > 0:
if has_substring(lines[i + k + 1], 'ebp+edi'):
edi = 0
for z in range(i, i + k + 1):
v = extract_imm_value(lines[z])
if has_substring(lines[z], 'sub'):
edi -= v
else:
edi += v
edi %= 0x100000000
line = evaluate_ss_edi_expr(lines[i + k + 1], edi)
new_lines.append(line)
i += k + 2
continue
new_lines.append(line)
i += 1
return new_lines
def pass_simplify_same_op(lines, op_name, reg_name):
# xxx reg, imm1
# xxx reg, imm2
# ...
# -> xxx reg, imm
new_lines = []
i = 0
while i < len(lines):
line = lines[i]
pat = '%s %s,0x' % (op_name, reg_name)
if has_substring(line, pat):
k = 0
while has_substring(lines[i + k + 1], pat):
k += 1
if k > 0:
v = 0
for z in range(i, i + k + 1):
v += extract_imm_value(lines[z])
v %= 0x100000000
if op_name.startswith('ro'):
# ror/rol
v %= 32
if v != 0:
new_lines.append('00000000 %s %s,%s\n' % (op_name, reg_name, hex_without_sign(v)))
i += k + 1
continue
new_lines.append(line)
i += 1
return new_lines
def pass_simplify_same_op_all(lines):
lines = pass_simplify_same_op(lines, 'add', 'esi')
lines = pass_simplify_same_op(lines, 'sub', 'esi')
lines = pass_simplify_same_op(lines, 'add', 'edi')
lines = pass_simplify_same_op(lines, 'sub', 'edi')
lines = pass_simplify_same_op(lines, 'add', 'eax')
lines = pass_simplify_same_op(lines, 'sub', 'eax')
lines = pass_simplify_same_op(lines, 'ror', 'eax')
lines = pass_simplify_same_op(lines, 'add', 'ebx')
lines = pass_simplify_same_op(lines, 'sub', 'ebx')
lines = pass_simplify_same_op(lines, 'ror', 'ebx')
lines = pass_simplify_same_op(lines, 'add', 'ecx')
lines = pass_simplify_same_op(lines, 'sub', 'ecx')
lines = pass_simplify_same_op(lines, 'ror', 'ecx')
lines = pass_simplify_same_op(lines, 'add', 'edx')
lines = pass_simplify_same_op(lines, 'sub', 'edx')
lines = pass_simplify_same_op(lines, 'ror', 'edx')
lines = pass_simplify_same_op(lines, 'ror', 'esp')
return lines
def pass_push_pop_eax_ebx_ecx(lines):
# push eax
# push ebx
# push ecx
# ...
# pop ecx
# pop ebx
# pop eax
# -> removed
new_lines = []
i = 0
while i < len(lines):
line = lines[i]
if has_consecutive_substring(lines, i, ['push eax', 'push ebx', 'push ecx']):
k = 3
while not has_consecutive_substring(lines, i+k, ['pop ecx', 'pop ebx', 'pop eax']):
k += 1
i += k + 3
continue
new_lines.append(line)
i += 1
return new_lines
def pass_rol_to_ror(lines, reg_name):
# rol xxx,0x -> ror xxx,0x
new_lines = []
pat = 'rol %s,0x' % reg_name
i = 0
while i < len(lines):
line = lines[i]
if has_substring(line, 'rol %s,1' % reg_name):
new_lines.append('00000000 %s %s,%s\n' % ('ror', reg_name, hex_without_sign(31)))
i += 1
continue
if has_substring(line, 'ror %s,1' % reg_name):
new_lines.append('00000000 %s %s,%s\n' % ('ror', reg_name, hex_without_sign(1)))
i += 1
continue
if has_substring(line, pat):
v = extract_imm_value(line)
v %= 32
v = 32 - v
new_lines.append('00000000 %s %s,%s\n' % ('ror', reg_name, hex_without_sign(v)))
i += 1
continue
new_lines.append(line)
i += 1
return new_lines
def pass_rol_to_ror_all(lines):
lines = pass_rol_to_ror(lines, 'eax')
lines = pass_rol_to_ror(lines, 'ebx')
lines = pass_rol_to_ror(lines, 'ecx')
lines = pass_rol_to_ror(lines, 'edx')
lines = pass_rol_to_ror(lines, 'esp')
return lines
def pass_unused_before_reassign(lines, reg_name):
# sub ecx, eax (-> removed)
# ...
# mov ecx, edx
new_lines = []
pat1 = ' %s,' % reg_name
i = 0
while i < len(lines):
line = lines[i]
if has_substring(line, pat1):
k = 0
unused = False
while (i + k + 1) < len(lines):
# if it is used/unused, break immediately
if has_substring(lines[i + k + 1], ',%s,' % reg_name):
# imul xxx,reg,0x0
break
dot = lines[i + k + 1].find(',')
if dot == -1:
if not has_substring(lines[i + k + 1], '%s' % reg_name):
# push xxx
k += 1
continue
break
if lines[i + k + 1].find(reg_name, dot) != -1:
# mov xxx, reg / mov xxx, [reg]
break
if lines[i + k + 1].find('[', 0, dot) != -1 and lines[i + k + 1].find(reg_name, 0, dot) != -1:
# mov [xxx+ecx], xxx
break
if has_substring(lines[i + k + 1], 'mov %s,' % reg_name):
unused = True
break
k += 1
# if (i + k + 1) >= len(lines):
# unused = True
if unused:
i += 1
continue
new_lines.append(line)
i += 1
return new_lines
def pass_unused_before_reassign_all(lines):
lines = pass_unused_before_reassign(lines, 'ecx')
lines = pass_unused_before_reassign(lines, 'edx')
lines = pass_unused_before_reassign(lines, 'eax')
lines = pass_unused_before_reassign(lines, 'ebx')
lines = pass_unused_before_reassign(lines, 'edi')
return lines
def pass_ror_to_shift(lines, reg_name):
# ror edx,0x8
# and edx,0xFFFFFF
# -> shr edx, 8
# ror ecx,0x1c
# and ecx,0xFFFFFFF0
# -> shl ecx,0x4
new_lines = []
i = 0
while i < len(lines):
line = lines[i]
if has_substring(line, 'ror %s,0x' % reg_name):
if has_substring(lines[i+1], 'and %s,0x' % reg_name):
shift = extract_imm_value(line)
mask = extract_imm_value(lines[i + 1])
rmask = (1 << (32 - shift)) - 1
lmask = 0xFFFFFFFF - ((1 << (32 - shift)) - 1)
if rmask == mask:
line = '00000000 shr %s,%s\n' % (reg_name, hex_without_sign(shift))
new_lines.append(line)
i += 2
continue
elif lmask == mask:
line = '00000000 shl %s,%s\n' % (reg_name, hex_without_sign(32 - shift))
new_lines.append(line)
i += 2
continue
new_lines.append(line)
i += 1
return new_lines
def pass_ror_to_shift_all(lines):
lines = pass_ror_to_shift(lines, 'eax')
lines = pass_ror_to_shift(lines, 'ebx')
lines = pass_ror_to_shift(lines, 'ecx')
lines = pass_ror_to_shift(lines, 'edx')
return lines
def test():
lines = read_file('trace.txt')
lines = pass_nops(lines)
lines = pass_rol_to_ror_all(lines)
lines = pass_edi(lines)
lines = pass_ebp(lines)
lines = pass_simplify_same_op_all(lines)
lines = pass_push_pop_eax_ebx_ecx(lines)
lines = pass_simplify_same_op_all(lines)
lines = pass_rol_to_ror_all(lines)
lines = pass_simplify_same_op_all(lines)
lines = pass_push_pop_eax_ebx_ecx(lines)
lines = pass_ebp(lines)
lines = pass_edi(lines)
lines = pass_simplify_same_op_all(lines)
lines = pass_simplify_same_op_all(lines)
for i in range(5):
lines = pass_unused_before_reassign_all(lines)
lines = pass_ror_to_shift_all(lines)
write_file('trace_new.txt', lines)
return
test()