import
re
def
read_file(file_name):
f
=
open
(file_name,
'r'
)
lines
=
f.readlines()
f.close()
return
lines
def
write_file(file_name, lines):
f
=
open
(file_name,
'w'
)
f.writelines(lines)
f.close()
return
def
has_substring(s, s_list):
if
isinstance
(s_list,
str
):
return
s.find(s_list) !
=
-
1
for
v
in
s_list:
if
s.find(v) !
=
-
1
:
return
True
return
False
def
has_consecutive_substring(lines, i, s_list):
for
k
in
range
(
len
(s_list)):
if
not
has_substring(lines[i
+
k], s_list[k]):
return
False
return
True
def
extract_imm_value(s):
pat
=
',0x'
i
=
s.find(pat)
+
len
(pat)
i_end
=
s.find(
' '
, i
+
1
)
return
int
(s[i:i_end],
16
)
def
hex_with_sign(v):
s
=
''
if
v >
=
0x80000000
:
v
=
0x100000000
-
v
s
+
=
'-'
else
:
s
=
'+'
s
+
=
hex
(v)
s
=
s.replace(
'L'
, '')
return
s
def
hex_without_sign(v):
s
=
''
s
+
=
hex
(v)
s
=
s.replace(
'L'
, '')
return
s
def
evaluate_ss_edi_expr(s, edi):
if
s.find(
'[ebp+edi+'
) !
=
-
1
:
v
=
re.search(
'\\[ebp\\+edi\\+0x(.*)\\]'
, s).group(
1
)
v
=
int
(v,
16
)
edi
+
=
v
elif
s.find(
'[ebp+edi*'
) !
=
-
1
:
m
=
re.search(
'\\[ebp\\+edi\\*([^+]+)\\+0x(.*)\\]'
, s)
v1
=
int
(m.group(
1
),
16
)
v2
=
int
(m.group(
2
),
16
)
edi
=
edi
*
v1
+
v2
elif
s.find(
'[ebp+edi]'
)
=
=
-
1
:
raise
Exception('')
edi
%
=
0x100000000
v
=
'[ebp%s]'
%
hex_with_sign(edi)
s
=
re.sub(
'\\[.*\\]'
, v, s)
return
s
def
pass_nops(lines):
new_lines
=
[]
i
=
0
while
i <
len
(lines):
line
=
lines[i]
if
has_substring(line, [
'test'
,
'cld'
,
'cmp'
,
'nop'
,
'cmc'
,
'bt'
,
'clc'
,
'stc'
]):
i
+
=
1
continue
if
re.search(
'or e..,0x0'
, line)
is
not
None
:
i
+
=
1
continue
if
re.search(
'and e..,0xFFFFFFFF'
, line)
is
not
None
:
i
+
=
1
continue
line
=
line.replace(
'transfor.'
,
'0x'
)
new_lines.append(line)
i
+
=
1
return
new_lines
def
pass_edi(lines):
new_lines
=
[]
i
=
0
while
i <
len
(lines):
line
=
lines[i]
if
has_substring(line,
'mov edi,0x'
):
k
=
0
while
has_substring(lines[i
+
k
+
1
], [
'sub edi'
,
'add edi'
]):
k
+
=
1
if
k >
0
:
if
has_substring(lines[i
+
k
+
1
],
',edi'
):
edi
=
0
for
z
in
range
(i, i
+
k
+
1
):
v
=
extract_imm_value(lines[z])
if
has_substring(lines[z],
'sub'
):
edi
-
=
v
else
:
edi
+
=
v
edi
%
=
0x100000000
line
=
lines[i
+
k
+
1
].replace(
',edi '
,
',%s'
%
hex_without_sign(edi))
new_lines.append(line)
i
+
=
k
+
2
continue
new_lines.append(line)
i
+
=
1
return
new_lines
def
pass_ebp(lines):
new_lines
=
[]
i
=
0
while
i <
len
(lines):
line
=
lines[i]
if
has_substring(line,
'mov edi,0x'
):
k
=
0
while
has_substring(lines[i
+
k
+
1
], [
'sub edi'
,
'add edi'
]):
k
+
=
1
if
k >
0
:
if
has_substring(lines[i
+
k
+
1
],
'ebp+edi'
):
edi
=
0
for
z
in
range
(i, i
+
k
+
1
):
v
=
extract_imm_value(lines[z])
if
has_substring(lines[z],
'sub'
):
edi
-
=
v
else
:
edi
+
=
v
edi
%
=
0x100000000
line
=
evaluate_ss_edi_expr(lines[i
+
k
+
1
], edi)
new_lines.append(line)
i
+
=
k
+
2
continue
new_lines.append(line)
i
+
=
1
return
new_lines
def
pass_simplify_same_op(lines, op_name, reg_name):
new_lines
=
[]
i
=
0
while
i <
len
(lines):
line
=
lines[i]
pat
=
'%s %s,0x'
%
(op_name, reg_name)
if
has_substring(line, pat):
k
=
0
while
has_substring(lines[i
+
k
+
1
], pat):
k
+
=
1
if
k >
0
:
v
=
0
for
z
in
range
(i, i
+
k
+
1
):
v
+
=
extract_imm_value(lines[z])
v
%
=
0x100000000
if
op_name.startswith(
'ro'
):
v
%
=
32
if
v !
=
0
:
new_lines.append(
'00000000 %s %s,%s\n'
%
(op_name, reg_name, hex_without_sign(v)))
i
+
=
k
+
1
continue
new_lines.append(line)
i
+
=
1
return
new_lines
def
pass_simplify_same_op_all(lines):
lines
=
pass_simplify_same_op(lines,
'add'
,
'esi'
)
lines
=
pass_simplify_same_op(lines,
'sub'
,
'esi'
)
lines
=
pass_simplify_same_op(lines,
'add'
,
'edi'
)
lines
=
pass_simplify_same_op(lines,
'sub'
,
'edi'
)
lines
=
pass_simplify_same_op(lines,
'add'
,
'eax'
)
lines
=
pass_simplify_same_op(lines,
'sub'
,
'eax'
)
lines
=
pass_simplify_same_op(lines,
'ror'
,
'eax'
)
lines
=
pass_simplify_same_op(lines,
'add'
,
'ebx'
)
lines
=
pass_simplify_same_op(lines,
'sub'
,
'ebx'
)
lines
=
pass_simplify_same_op(lines,
'ror'
,
'ebx'
)
lines
=
pass_simplify_same_op(lines,
'add'
,
'ecx'
)
lines
=
pass_simplify_same_op(lines,
'sub'
,
'ecx'
)
lines
=
pass_simplify_same_op(lines,
'ror'
,
'ecx'
)
lines
=
pass_simplify_same_op(lines,
'add'
,
'edx'
)
lines
=
pass_simplify_same_op(lines,
'sub'
,
'edx'
)
lines
=
pass_simplify_same_op(lines,
'ror'
,
'edx'
)
lines
=
pass_simplify_same_op(lines,
'ror'
,
'esp'
)
return
lines
def
pass_push_pop_eax_ebx_ecx(lines):
new_lines
=
[]
i
=
0
while
i <
len
(lines):
line
=
lines[i]
if
has_consecutive_substring(lines, i, [
'push eax'
,
'push ebx'
,
'push ecx'
]):
k
=
3
while
not
has_consecutive_substring(lines, i
+
k, [
'pop ecx'
,
'pop ebx'
,
'pop eax'
]):
k
+
=
1
i
+
=
k
+
3
continue
new_lines.append(line)
i
+
=
1
return
new_lines
def
pass_rol_to_ror(lines, reg_name):
new_lines
=
[]
pat
=
'rol %s,0x'
%
reg_name
i
=
0
while
i <
len
(lines):
line
=
lines[i]
if
has_substring(line,
'rol %s,1'
%
reg_name):
new_lines.append(
'00000000 %s %s,%s\n'
%
(
'ror'
, reg_name, hex_without_sign(
31
)))
i
+
=
1
continue
if
has_substring(line,
'ror %s,1'
%
reg_name):
new_lines.append(
'00000000 %s %s,%s\n'
%
(
'ror'
, reg_name, hex_without_sign(
1
)))
i
+
=
1
continue
if
has_substring(line, pat):
v
=
extract_imm_value(line)
v
%
=
32
v
=
32
-
v
new_lines.append(
'00000000 %s %s,%s\n'
%
(
'ror'
, reg_name, hex_without_sign(v)))
i
+
=
1
continue
new_lines.append(line)
i
+
=
1
return
new_lines
def
pass_rol_to_ror_all(lines):
lines
=
pass_rol_to_ror(lines,
'eax'
)
lines
=
pass_rol_to_ror(lines,
'ebx'
)
lines
=
pass_rol_to_ror(lines,
'ecx'
)
lines
=
pass_rol_to_ror(lines,
'edx'
)
lines
=
pass_rol_to_ror(lines,
'esp'
)
return
lines
def
pass_unused_before_reassign(lines, reg_name):
new_lines
=
[]
pat1
=
' %s,'
%
reg_name
i
=
0
while
i <
len
(lines):
line
=
lines[i]
if
has_substring(line, pat1):
k
=
0
unused
=
False
while
(i
+
k
+
1
) <
len
(lines):
if
has_substring(lines[i
+
k
+
1
],
',%s,'
%
reg_name):
break
dot
=
lines[i
+
k
+
1
].find(
','
)
if
dot
=
=
-
1
:
if
not
has_substring(lines[i
+
k
+
1
],
'%s'
%
reg_name):
k
+
=
1
continue
break
if
lines[i
+
k
+
1
].find(reg_name, dot) !
=
-
1
:
break
if
lines[i
+
k
+
1
].find(
'['
,
0
, dot) !
=
-
1
and
lines[i
+
k
+
1
].find(reg_name,
0
, dot) !
=
-
1
:
break
if
has_substring(lines[i
+
k
+
1
],
'mov %s,'
%
reg_name):
unused
=
True
break
k
+
=
1
if
unused:
i
+
=
1
continue
new_lines.append(line)
i
+
=
1
return
new_lines
def
pass_unused_before_reassign_all(lines):
lines
=
pass_unused_before_reassign(lines,
'ecx'
)
lines
=
pass_unused_before_reassign(lines,
'edx'
)
lines
=
pass_unused_before_reassign(lines,
'eax'
)
lines
=
pass_unused_before_reassign(lines,
'ebx'
)
lines
=
pass_unused_before_reassign(lines,
'edi'
)
return
lines
def
pass_ror_to_shift(lines, reg_name):
new_lines
=
[]
i
=
0
while
i <
len
(lines):
line
=
lines[i]
if
has_substring(line,
'ror %s,0x'
%
reg_name):
if
has_substring(lines[i
+
1
],
'and %s,0x'
%
reg_name):
shift
=
extract_imm_value(line)
mask
=
extract_imm_value(lines[i
+
1
])
rmask
=
(
1
<< (
32
-
shift))
-
1
lmask
=
0xFFFFFFFF
-
((
1
<< (
32
-
shift))
-
1
)
if
rmask
=
=
mask:
line
=
'00000000 shr %s,%s\n'
%
(reg_name, hex_without_sign(shift))
new_lines.append(line)
i
+
=
2
continue
elif
lmask
=
=
mask:
line
=
'00000000 shl %s,%s\n'
%
(reg_name, hex_without_sign(
32
-
shift))
new_lines.append(line)
i
+
=
2
continue
new_lines.append(line)
i
+
=
1
return
new_lines
def
pass_ror_to_shift_all(lines):
lines
=
pass_ror_to_shift(lines,
'eax'
)
lines
=
pass_ror_to_shift(lines,
'ebx'
)
lines
=
pass_ror_to_shift(lines,
'ecx'
)
lines
=
pass_ror_to_shift(lines,
'edx'
)
return
lines
def
test():
lines
=
read_file(
'trace.txt'
)
lines
=
pass_nops(lines)
lines
=
pass_rol_to_ror_all(lines)
lines
=
pass_edi(lines)
lines
=
pass_ebp(lines)
lines
=
pass_simplify_same_op_all(lines)
lines
=
pass_push_pop_eax_ebx_ecx(lines)
lines
=
pass_simplify_same_op_all(lines)
lines
=
pass_rol_to_ror_all(lines)
lines
=
pass_simplify_same_op_all(lines)
lines
=
pass_push_pop_eax_ebx_ecx(lines)
lines
=
pass_ebp(lines)
lines
=
pass_edi(lines)
lines
=
pass_simplify_same_op_all(lines)
lines
=
pass_simplify_same_op_all(lines)
for
i
in
range
(
5
):
lines
=
pass_unused_before_reassign_all(lines)
lines
=
pass_ror_to_shift_all(lines)
write_file(
'trace_new.txt'
, lines)
return
test()