-
-
[原创] L3HCTF2024 ez_stack 栈虚拟机+xxtea
-
发表于: 2024-2-24 00:42 3412
-
re ez_stack
老年人复健,看到vm习惯性看了半天汇编搞笑了,没想到byd f5出来就是干净的虚拟机的代码
入手(分析歪了,忘记F5了,这段可以跳过)
疑似分发点
在等待用户输入时暂停程序,找到call输入的位置:402C8E syscall
单步,401D29 处多次被执行,疑似VM分发位置
这里根据var_16F的高4位进行switch
401D30(rva=0x1D30)条件断点,看一下eax的东西:
发现该处被执行非常多次
取字节码函数与vm结构体
找到上述截图中[rbp-0x16F]的被更新的位置(疑似opcode)
401A27 call sub_40170D
401A2C mov [rbp-0x16F], al
sub_40170D是取字节码,该函数非常简单:
BYTE __fastcall f_get_opcode(s1 *a1)
{
__int64 v1; // rax
if ( a1->ui32_pc )
LOBYTE(v1) = a1->ui64_base[--a1->ui32_pc];
else
return sys_exit(-1);
return v1;
}
分析该函数得到VM结构体:
struct s1{
uint64 ui64_base; // +00
uint32 ui32_pc; // +08
uint32 ui32_0C; // +0C
};
base是字节码数组基址,pc是取指计数,sub_40170D每次先减pc,再取base+pc指向的一字节,即倒序取指
第一个vm结构体addr=406040,pc起始未知
读出vm_406040的字节码
import idaapi
s1_addr = 0x406040
base = idaapi.dbg_read_memory(s1_addr, 8)
base = int.from_bytes(base, 'little')
pc = idaapi.dbg_read_memory(s1_addr+8, 4)
pc = int.from_bytes(pc, 'little')
print('base={:X}, pc={:X}'.format(base, pc))
opcode = idaapi.dbg_read_memory(base, 0x30)
print(opcode)
vm字节码解码初探
从4019DE开始,f_get_opcode_40170D被调用了4次,每次都是从vm_406040中取字节码
故vm_406040的字节码的格式应该是(按字节从低到高)
[var_16F_opcode], [var_169], [var_16A], [var_16B]
接着是call sub_4015AE四次,不知道在干嘛
最后走到401D29分发,分发是依据var_16F_opcode高4位
分析0x60对应的0x4020FA
还有一个vm结构体addr=406050,这个字节码似乎是纯数据;即vm结构体中的字节码分为代码类型和数据类型;vm_406040是代码字节码,vm_406050是数据字节码
vm分析
初始化
以上过程都在sub_4017A1这个函数之中
f_init_401139():这个函数完成了字节码的解密,然后调用f_vm_4017A1
void __noreturn f_init_401139()
{
BYTE *src_i; // rsi
BYTE *dest_i; // rdi
signed __int64 len; // rcx
BYTE *dest; // [rsp+10h] [rbp-40h]
dest = (BYTE *)sys_mmap(0LL, &loc_401529 - (_UNKNOWN *)qword_401180, 3uLL, 0x22uLL, 0xFFFFFFFFFFFFFFFFLL, 0LL);
src_i = (BYTE *)qword_401180;
dest_i = dest;
len = &loc_401529 - (_UNKNOWN *)qword_401180;
do
*dest_i++ = len-- ^ *src_i++; // 解密字节码
while ( len );
f_vm_4017A1(dest); // 开始运行虚拟机
}
加密的字节码:分析f_init_401139,加密的字节码存在401180的位置
在准备call f_vm_4017A1时,dump解密的字节码
基于栈的虚拟机
分析f_vm_4017A1,字节码解析,虚拟机指令执行都在这里
一共有三个栈,栈控制结构位于406040、406050、406060
栈以字节为单位,栈控制结构如下(即前面的VM结构体):
struct s1{
unsigned char* base; // 栈基址
unsigned int num; // 栈中字节数; 同时也是sp, 指向空位
unsigned int max; // 栈容量
};
sub_4015AE是push操作
sub_40170D是pop操作
406040这个栈存储待执行的指令字节码,相当于IR寄存器
406050这个栈存储运行中的数据,相当于x86&C的栈上变量
406060这个栈用于运行时暂存数据,相当于x86&C的通用寄存器;以及暂存执行过的指令,用于实现循环功能
指令字节码格式
解密出来的字节码混合了数据和指令,指令是压缩存储的
指令字节码是定长的4四字节,从解密字节码中转换成4字节指令字节码,并压入406040的栈:
v6 = *pdata;
oprand_num = v6 & 3;
for ( i = 0; i <= oprand_num; ++i )
{
v1 = pdata++;
f_push(&vm_406040, *v1);
}
for ( j = 3; j > oprand_num; --j )
f_push(&vm_406040, 0);
从406040栈中取指令,此处会将取出的指令push到vm_406060,用于循环:
op_big[0] = f_pop(&vm_406040);
op_big[1] = f_pop(&vm_406040);
op_big[2] = f_pop(&vm_406040);
op0 = f_pop(&vm_406040);
f_push(&vm_406060, op_big[0]);
f_push(&vm_406060, op_big[1]);
f_push(&vm_406060, op_big[2]);
f_push(&vm_406060, op0);
指令的格式,直接贴解析指令的代码:
x = op0 & 3
y = op0 & 0xF0
if x == 0:
z = 1
elif x == 1:
z = op_big[2]
elif x == 2:
z = op_big[1] | (op_big[2] << 8)
elif x == 3:
z = (op_big[1] <= 7) | (op_big[2] << 16) | op_big[0]
op0是指令信息,op_big是指令原始操作数
x是原始操作数的个数(x+1就是指令压缩的长度,后出先进,op0就是解密后解压前指令的第一个字节)
y是操作码
z是解密的操作数,根据原始操作数解密得出(原始操作数可能未加密,此时z不被使用)
虚拟机指令分析
按照y分发
IO指令
0xE0是从vm406050依次pop z个字节,并sys_write()出来
0xD0是读z个字节,然后依次push进vm406050
栈上运算指令
0xC0:位移运算,详见后面分析
0xB0:是or运算,类似0x80的sub运算
0xA0:是and运算,类似0x80的sub运算
0x90:是xor运算,类似0x80的sub运算,xor的操作数有可能是32字节
0x80:是sub运算,类似手算竖式减法那样从低字节往高字节减
0x70:是add运算,类似0x80的sub运算
0x80的sub指令分析:
z是减数和被减数长度,栈中低位数-高位数,结果存回栈(都是小端序)
carry = 0;
v_zf = 1;
for ( byte_idx = z; byte_idx > 0; --byte_idx )
{ // byte_idx是当前被减的字节(从1开始,从高位开始计数)
for ( i10 = 0; i10 < byte_idx - 1; ++i10 )
{ // 暂存高位字节
temp = f_pop(&vm_406050);
f_push(&vm_406060, temp);
}
top0 = f_pop(&vm_406050); // 取除减数字节
for ( i11 = 0; i11 < byte_idx - 1; ++i11 )
{ // 暂存高位字节
temp = f_pop(&vm_406050);
f_push(&vm_406060, temp);
}
top1 = f_pop(&vm_406050); // 取被减数字节
v53 = top1 - top0 - carry;
top0 = top1 - top0 - carry;
carry = -(v53 >> 8);
if ( top0 )
v_zf = 0;
f_push(&vm_406050, top0);
for ( i12 = 0; i12 < 2 * (byte_idx - 1); ++i12 )
{ // 回填暂存的字节
temp_1 = f_pop(&vm_406060);
f_push(&vm_406050, temp_1);
}
}
移动数据指令
0x60:复制vm406050中的倒数第z个(从0开始计数)字节,push到vm406050
0x50:删除vm406050中的倒数第z个(从0开始计数)字节,类似0x60
0x40:将有效的原始操作数push到vm406050
控制流指令
0x30:向后跳转,类似break,z是跳过的指令(不包括本指令)
if ( jmpnum ) // 实现向后跳转
{
--jmpnum;
op0 = 0;
}
// ...
y = op0 & 0xF0;
// ...
if ( y == 0x30 ) // jmp
{
jmpnum = z;
v_zf = 0;
}
0x20:从406060中取出之前push进去的旧指令,实现向前跳转,循环的功能
取指指令
0x10是从解密字节码中解压z条指令,并压入406040的栈
分支指令
op0 & 0xF == 8
为分支指令,调试发现只有y == 0x38这一种分支指令
zf=0(!=0)就不向后跳转(不break)
zf=1(==0)就向后跳转(break)
if ( (op0 & 8) != 0 && !v_zf ) // op0&8, 就测试zf, zf为0就不执行该指令
{
v_zf = 0;
op0 = 0;
}
y = op0 & 0xF0;
虚拟机0xC0位移指令分析
对栈顶的元素进行位移操作
op_big[0]是位移位数
op_big[1]是元素字节数
op_big[2]是位移的类型
00b是左移1位
01b是右移1位
10b是循环左移1位
11b是循环右移1位
整理if y == 0xC0
处的代码如下:
// y == 0xC0, op_big[2] == 00
void handle_C0_00(BYTE shift_bits, BYTE elem_size) {
for ( int n = 0; n < shift_bits; ++n ) { // for each group
// stack50 => stack60
for ( int i = 0; i < elem_size; ++i ) {
unsigned char top = f_pop(&vm_406050);
f_push(&vm_406060, top);
}
// stack60 =(high-1 to low)=> stack50
unsigned char last_hi_1 = 0;
for ( int i = 0; i < elem_size; ++i ) {
unsigned char top = f_pop(&vm_406060);
unsigned char hi_1 = top >> 7;
top = (top << 1) | last_hi_1;
last_hi_1 = hi_1;
f_push(&vm_406050, top);
}
} // end of each group
}
// y == 0xC0, op_big[2] == 01
void handle_C0_01(BYTE shift_bits, BYTE elem_size) {
for ( int n = 0; n < shift_bits; ++n ) { // for each group
// stack50 =(low-1 to high)> stack60
unsigned char last_lo_1 = 0;
for ( int i = 0; i < elem_size; ++i )
{
unsigned char top = f_pop(&vm_406050);
unsigned char lo_1 = top << 7;
top = last_lo_1 | (top >> 1);
last_lo_1 = lo_1;
f_push(&vm_406060, top);
}
// stack60 => stack50
for ( int i = 0; i < elem_size; ++i )
{
unsigned char top = f_pop(&vm_406060);
f_push(&vm_406050, top);
}
} // end of each group
}
// y == 0xC0, op_big[2] == 10
void handle_C0_10(BYTE shift_bits, BYTE elem_size) {
for ( int n = 0; n < shift_bits; ++n ) { // for each group
// circle
unsigned char last_hi_1 = 0;
unsigned char top = f_pop(&vm_406050); // this is stack60 stack back
last_hi_1 = top >> 7;
f_push(&vm_406050, top);
// stack50 => stakc60
for ( int i = 0; i < elem_size; ++i )
{
unsigned char top = f_pop(&vm_406050);
f_push(&vm_406060, top);
}
// stack60 =(high-1 to low)> stack50
for ( int i = 0; i < elem_size; ++i )
{
unsigned char top = f_pop(&vm_406060);
unsigned char hi_1 = top >> 7;
top = (top << 1) | last_hi_1;
last_hi_1 = hi_1;
f_push(&vm_406050, top);
}
} // end of each group
}
// y == 0xC0, op_big[2] == 11
void handle_C0_11(BYTE shift_bits, BYTE elem_size) {
for ( int n = 0; n < shift_bits; ++n ) { // for each group
// stack50 =(low-1 to high)> stack60
unsigned char last_lo_1 = 0;
for ( int i = 0; i < elem_size; ++i )
{
unsigned char top = f_pop(&vm_406050);
unsigned char lo_1 = top << 7;
top = last_lo_1 | (top >> 1);
last_lo_1 = lo_1;
f_push(&vm_406060, top);
}
// stack60 => stack50
for ( int i = 0; i < elem_size; ++i )
{
unsigned char top = f_pop(&vm_406060);
f_push(&vm_406050, top);
}
// cricle
unsigned char top = f_pop(&vm_406050);
top |= last_lo_1;
f_push(&vm_406050, top);
} // end of each group
}
还原结果
vm虚拟机反汇编结果
见附件
反编译结果(手工)
根据上面的汇编反编译,这里加入了栈指针,方便手工处理
这题没有混淆,要是能传播变量基本就很易读了。但有循环gcc a.c -Os -o a
优化不了,有空再研究存在分支的情况下怎么优化,主要要弄个phi函数处理分支回合点有点麻烦,之前vmp还原也是卡在这里,只做了基本块内的优化
直接贴结果,每一段vm虚拟机汇编下面对应反编译的c代码
第二个while1是我自己加的,那段代码有明显规律,其实就一个for 8的展开
...... ......
解密
看还原结果,前面说到第二个while(1)是我看到有规律为了方便自己加上去的循环
实际上这个看到第一段熟悉tea的就能猜到是变形xxtea,后面xor实际就是32字节的相等判断,这题就结束了
不了解tea的其实那8段代码也很有规律,硬看其实也花不了多少时间
优化一下,题目就是函数encrypto,解密函数是decrypto:
#include<stdio.h>
#include<memory.h>
#define MZ ((y>>5) ^ (z<<3) ^ ((y^sum)+key[i]))
const unsigned int DELTA = 0x9e3779b9;
void encrypto(unsigned int *m, unsigned int *c, unsigned int *key, int n, int rounds) {
memcpy(c, m, 4*n);
unsigned int sum = 0;
for (int round = 0; round < rounds; round++) {
sum += DELTA;
for(int i = 0; i < n; i++){
// unsigned int x = c[i];
unsigned int y = c[(i+1)%n];
unsigned int z = c[(i-1+n)%n];
c[i] += MZ;
}
}
}
void decrypto(unsigned int *m, unsigned int *c, unsigned int *key, int n, int rounds) {
memcpy(m, c, 4*n);
unsigned int sum = DELTA*rounds;
for (int round = 0; round < rounds; round++) {
for(int i = n - 1; i >= 0; i--){
// unsigned int x = m[i];
unsigned int y = m[(i+1)%n];
unsigned int z = m[(i-1+n)%n];
m[i] -= MZ;
}
sum -= DELTA;
}
}
int main() {
unsigned char key[32] = {105, 163, 127, 84, 167, 81, 175, 67, 199, 102, 143, 238, 213, 254, 91, 38, 150, 131, 19, 218, 181, 52, 132, 233, 119, 81, 166, 132, 75, 8, 106, 75};
unsigned char m[32] = {0,};
unsigned char c[32] = {62, 85, 188, 129, 9, 113, 186, 116, 152, 129, 71, 189, 230, 61, 86, 69, 97, 198, 232, 98, 218, 121, 11, 208, 206, 211, 47, 177, 3, 2, 22, 245};
int n = 8;
int rounds = 32;
decrypto((unsigned int *)m, (unsigned int *)c, (unsigned int *)key, n, rounds);
printf("m = ");
for (int i = 0; i < 32; i++)
printf("%c", m[i]);
printf("\n");
encrypto((unsigned int *)m, (unsigned int *)c, (unsigned int *)key, n, rounds);
printf("c = ");
for (int i = 0; i < 32; i++)
printf("%d, ", c[i]);
printf("\n\n");
return 0;
}