If you don't know what code virtualizer is, or how it works, you should read this first:
http://rapidshare.com/files/16968098/Inside_Code_Virtualizer.rar
(Inside Code Virtualizer by scherzo)
Now, as you probably already know from paper by scherzo , one possible way recover virtualized code is to identify each mutated handler (find corresponding non-mutated version). After this done, we can trace virtual opcodes and "decompile" them to VM instructions. Having "clean" decompiled output, we can translate it to x86 assembly. I consider the last step, to be simple "find and replace" job with flex/yacc.
The problem is, oreans' vm engine can be a bitch. Consider this piece of code:
Code:
push ebx
mov ebx 0F06h
inc ebx
shr ebx 15h
push ecx
mov ecx 6156h
xor ebx ecx
pop ecx
add ebx 4114h
shl ebx 7
push ecx
mov ecx 51351Ch
xor ebx ecx
pop ecx
add ebx edi
mov eax [ebx]
pop ebx
push 67E0h
mov [esp] edx
mov edx 1
and eax edx
mov edx [esp]
push edx
mov edx esp
add edx 4
add edx 4
xchg edx [esp]
pop esp
or eax eax
push eax
mov eax 3B02h
not eax
push ecx
mov ecx 0FFFFC3FFh
sub eax ecx
pop ecx
and [edi+1Ch] eax
mov eax [esp]
add esp 4
push 3328h
mov [esp] ebx
mov ebx [esp]
push ecx
mov ecx esp
add ecx 4
add ecx 4
xchg ecx [esp]
pop esp
Can you tell what it does? Me neither, so let's try to deobfuscate this crap.
It turns out that simple strategies like contant folding (http://en.wikipedia.org/wiki/Constant_folding), dead code elimination (http://en.wikipedia.org/wiki/Dead_code), peephole optimisation (find and replace :P) plus some stack cleaning , suffice to recover obfuscated code:
Code:
NEW = 47, OLD = 0, -47 lines removed
################################ NEXT ROUND ###########################
################################ original
push ebx
mov ebx 00000f06
inc ebx
shr ebx 00000015
push ecx
mov ecx 00006156
xor ebx ecx
pop ecx
add ebx 00004114
shl ebx 00000007
push ecx
mov ecx 0051351c
xor ebx ecx
pop ecx
add ebx edi
mov eax [ebx ]
pop ebx
push 000067e0
mov [esp ] edx
mov edx 00000001
and eax edx
mov edx [esp ]
push edx
mov edx esp
add edx 00000004
add edx 00000004
xchg edx [esp ]
pop esp
or eax eax
push eax
mov eax 00003b02
not eax
push ecx
mov ecx ffffc3ff
sub eax ecx
pop ecx
and [edi 0000001c ] eax
mov eax [esp ]
add esp 00000004
push 00003328
mov [esp ] ebx
mov ebx [esp ]
push ecx
mov ecx esp
add ecx 00000004
add ecx 00000004
xchg ecx [esp ]
pop esp
################################ after constant propagation and folding
push ebx
mov ebx 00000000
push ecx
mov ecx 00006156
xor ebx 00006156
pop ecx
add ebx 00004114
shl ebx 00000007
push ecx
mov ecx 0051351c
xor ebx 0051351c
pop ecx
add ebx edi
mov eax [ebx ]
pop ebx
push 000067e0
mov [esp ] edx
mov edx 00000001
and eax 00000001
mov edx [esp ]
push edx
mov edx esp
add edx 00000004
add edx 00000004
xchg edx [esp ]
pop esp
or eax eax
push eax
mov eax ffffc4fd
push ecx
mov ecx ffffc3ff
sub eax ffffc3ff
pop ecx
and [edi 0000001c ] eax
mov eax [esp ]
add esp 00000004
push 00003328
mov [esp ] ebx
mov ebx [esp ]
push ecx
mov ecx esp
add ecx 00000004
add ecx 00000004
xchg ecx [esp ]
pop esp
################################ after dead code elimination
push ebx
mov ebx 00000000
push ecx
xor ebx 00006156
pop ecx
add ebx 00004114
shl ebx 00000007
push ecx
xor ebx 0051351c
pop ecx
add ebx edi
mov eax [ebx ]
pop ebx
push 000067e0
mov [esp ] edx
and eax 00000001
mov edx [esp ]
push edx
mov edx esp
add edx 00000004
add edx 00000004
xchg edx [esp ]
pop esp
or eax eax
push eax
mov eax ffffc4fd
push ecx
sub eax ffffc3ff
pop ecx
and [edi 0000001c ] eax
mov eax [esp ]
add esp 00000004
push 00003328
mov [esp ] ebx
mov ebx [esp ]
push ecx
mov ecx esp
add ecx 00000004
add ecx 00000004
xchg ecx [esp ]
pop esp
################################ after peephole optimisation
push ebx
mov ebx 00000000
push ecx
xor ebx 00006156
pop ecx
add ebx 00004114
shl ebx 00000007
push ecx
xor ebx 0051351c
pop ecx
add ebx edi
mov eax [ebx ]
pop ebx
push edx
and eax 00000001
pop edx
or eax eax
push eax
mov eax ffffc4fd
push ecx
sub eax ffffc3ff
pop ecx
and [edi 0000001c ] eax
pop eax
push ebx
pop ebx
################################ after stack cleaning
mov ebx 00000000
xor ebx 00006156
add ebx 00004114
shl ebx 00000007
xor ebx 0051351c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
mov eax ffffc4fd
sub eax ffffc3ff
and [edi 0000001c ] eax
NEW = 11, OLD = 47, 36 lines removed
################################ NEXT ROUND ###########################
################################ original
mov ebx 00000000
xor ebx 00006156
add ebx 00004114
shl ebx 00000007
xor ebx 0051351c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
mov eax ffffc4fd
sub eax ffffc3ff
and [edi 0000001c ] eax
################################ after constant propagation and folding
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
mov eax 000000fe
and [edi 0000001c ] 000000fe
################################ after dead code elimination
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after peephole optimisation
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after stack cleaning
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
NEW = 5, OLD = 11, 6 lines removed
################################ NEXT ROUND ###########################
################################ original
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after constant propagation and folding
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after dead code elimination
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after peephole optimisation
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
################################ after stack cleaning
mov ebx 0000001c
add ebx edi
mov eax [ebx ]
and eax 00000001
or eax eax
and [edi 0000001c ] 000000fe
Well almost . Above trash is the verbose output of my little "cleaner" tool. Cleaner is usable, it'll give nice results for most of included code samples. In handlers.clean folder (see link at bottom) there are nonmutated versions of CV handlers. After deobfuscation, few heuristics can be applied to match deobfuscated and clean versions: edit distance / rare instruction matching (for example rol, ror, rcr are rare and show up only in one handler).
The problem is, I got bored with all of this, so if anyone would like to help, I will be more than happy
Here is the code:
http://www.orange-bat.com/oreans.rar
compile with make, will work without problems under cygwin. it should work under linux. to use rip_handlers.py you will need idapython.