[原创]安卓逆向的核心流程里面分享了安卓逆向技能树的“树干”。“树干”上有多个“树枝”,其中有一个就是“混淆还原”,用来还原复杂难懂的代码。然后这个“树枝”上又有多个“树叶”,接下来分享的就是里面的“扁平化还原”。
大多OLLVM扁平化还原的核心思路是基于:块关系。
这篇文章从另外一个角度进行还原:状态机。
样本来自:[原创]OLLVM控制流平坦化混淆还原,目标函数:.init_proc
先说一下对OLLVM扁平化的理解
正常一个函数,会有多个基本块,逻辑清晰
函数通过OLLVM扁平化之后,逻辑变得复杂
所谓的扁平化,本质上是用状态机的方式对所有基本块进行重新调度。
原本直观的层次关系被抹除,所有基本块扁平成一排,从而提高静态分析的成本。
更形象地理解:一本书本来是章节结构清晰的,扁平化后每一段后面写着下一段的编号,要去找到这个编号才能串起来。
这里先梳理一下状态机的调度过程,后续还原要用到:

因为改变状态之后,都会回到分发器:goto dispatcher,这就意味着dispatcher引用有很多个
这个函数只有一个集中返回的点,这里简单粗暴定为引用最多次数就是dispatcher。
对应可以写一个找dispatcher的脚本
0x43120对应有29个引用

状态寄存器存放状态值,分发器根据状态值进行调度,举个例子:
对应可以写一个找状态寄存的脚本:
调度过程都是:改变状态+返回分发器
返回分发器直接找:B dispatcher_ea
改变状态有2种情况:
对应的可以写一个寻找改变状态块的脚本
先比较状态,再执行基本块,具体有2种情况:
对应的可以写一个寻找命中状态块的脚本
OLLVM扁平化的还原,实际上是去掉分发器,从而还原基本块的关系
上面说到基本块的结束有2种情况,所以还原扁平化的脚本要处理2种情况:
还原之前:逻辑复杂,总共181行

还原之后:逻辑清晰,总共53行,是原来的29%

基于状态机的角度进行还原,关键的步骤如下:
实战过程遇到的情况会复杂很多,比如:
1、同个函数里面存在多个分发器;
2、“存放状态的寄存器”和“比较状态的寄存器”不是同一个;
3、比较状态不是直接判断值是否相等,而是用等价的表达式。
如果代码写得好,一键还原还挺爽的。
PS:块关系的角度,是先对所有基本块进行分类,再找出真实块之间的关联。但真实块之间的关联需要执行才知道(真机trace、模拟运行或者符号执行)。
int demo(int x) {
if (x > 0) { // 入口块
return 1; // true分支块
} else {
return -1; // false分支块
}
}
int demo(int x) {
if (x > 0) { // 入口块
return 1; // true分支块
} else {
return -1; // false分支块
}
}
int demo_flattened(int x) {
uint32_t state = 0xINIT; // 初始状态常量
int ret = 0;
dispatcher: // 分发器
switch (state) {
case 0xINIT: {
// 对应原来的入口 + 条件判断
if (x > 0) {
state = 0xS_TRUE; // 真分支状态常量
} else {
state = 0xS_FALSE; // 假分支状态常量
}
goto dispatcher;
}
case 0xS_TRUE: {
// 对应原函数的 true 分支:return 1;
ret = 1;
state = 0xS_RET; // 跳到“返回”状态
goto dispatcher;
}
case 0xS_FALSE: {
// 对应原函数的 false 分支:return -1;
ret = -1;
state = 0xS_RET; // 跳到“返回”状态
goto dispatcher;
}
case 0xS_RET: {
return ret;
}
}
}
int demo_flattened(int x) {
uint32_t state = 0xINIT; // 初始状态常量
int ret = 0;
dispatcher: // 分发器
switch (state) {
case 0xINIT: {
// 对应原来的入口 + 条件判断
if (x > 0) {
state = 0xS_TRUE; // 真分支状态常量
} else {
state = 0xS_FALSE; // 假分支状态常量
}
goto dispatcher;
}
case 0xS_TRUE: {
// 对应原函数的 true 分支:return 1;
ret = 1;
state = 0xS_RET; // 跳到“返回”状态
goto dispatcher;
}
case 0xS_FALSE: {
// 对应原函数的 false 分支:return -1;
ret = -1;
state = 0xS_RET; // 跳到“返回”状态
goto dispatcher;
}
case 0xS_RET: {
return ret;
}
}
}
state == STATE_INIT // 1、初始化状态:
dispatcher: // 分发器
if (state == STATE_XXX){ // 2、比较状态
do something; // 3、命中状态
state = STATE_YYY; // 4、改变状态
goto dispatcher; // 5、返回分发器
} else if (state == STATE_YYY){
...
}
state == STATE_INIT // 1、初始化状态:
dispatcher: // 分发器
if (state == STATE_XXX){ // 2、比较状态
do something; // 3、命中状态
state = STATE_YYY; // 4、改变状态
goto dispatcher; // 5、返回分发器
} else if (state == STATE_YYY){
...
}
def find_dispatcher_ea(start_ea, end_ea):
counts = {}
ea = start_ea
while ea < end_ea:
insn = ida_ua.insn_t()
size = ida_ua.decode_insn(insn, ea)
if size > 0:
mnem = ida_ua.print_insn_mnem(ea)
if mnem == "B":
target = idc.get_operand_value(ea, 0)
if target != idc.BADADDR:
counts[target] = counts.get(target, 0) + 1
ea += size
else:
ea += 1
if not counts:
return None
most_common = max(counts.items(), key=lambda x: x[1])
return most_common[0]
dispatcher_ea = find_dispatcher_ea(start_ea, end_ea)
print("分发器地址: dispatcher_ea=0x{:X}".format(dispatcher_ea))
分发器地址: dispatcher_ea=0x43120
def find_dispatcher_ea(start_ea, end_ea):
counts = {}
ea = start_ea
while ea < end_ea:
insn = ida_ua.insn_t()
size = ida_ua.decode_insn(insn, ea)
if size > 0:
mnem = ida_ua.print_insn_mnem(ea)
if mnem == "B":
target = idc.get_operand_value(ea, 0)
if target != idc.BADADDR:
counts[target] = counts.get(target, 0) + 1
ea += size
else:
ea += 1
if not counts:
return None
most_common = max(counts.items(), key=lambda x: x[1])
return most_common[0]
dispatcher_ea = find_dispatcher_ea(start_ea, end_ea)
print("分发器地址: dispatcher_ea=0x{:X}".format(dispatcher_ea))
分发器地址: dispatcher_ea=0x43120
MOV W9,
CMP W8, W9
B.NE loc_43120 // dispatcher_ea
等价
if (state_reg !=
在这里“状态寄存器”state_reg就是CMP W8, W9里面的W8
MOV W9,
CMP W8, W9
B.NE loc_43120 // dispatcher_ea
等价
if (state_reg !=
在这里“状态寄存器”state_reg就是CMP W8, W9里面的W8
def find_state_reg(start_ea, end_ea, dispatcher_ea):
ea = start_ea
while ea < end_ea:
insn = ida_ua.insn_t()
size = ida_ua.decode_insn(insn, ea)
if size > 0:
mnem = ida_ua.print_insn_mnem(ea)
if mnem == "B.NE" and idc.get_operand_value(ea, 0) == dispatcher_ea :
prev_ea = idc.prev_head(ea)
state_reg = idc.print_operand(prev_ea, 0).strip().upper()
return state_reg
ea += size
else:
ea += 1
return None
state_reg = find_state_reg(start_ea, end_ea, dispatcher_ea)
print("状态寄存器: state_reg={}".format(state_reg))
状态寄存器: state_reg=W8
def find_state_reg(start_ea, end_ea, dispatcher_ea):
ea = start_ea
while ea < end_ea:
insn = ida_ua.insn_t()
size = ida_ua.decode_insn(insn, ea)
if size > 0:
mnem = ida_ua.print_insn_mnem(ea)
if mnem == "B.NE" and idc.get_operand_value(ea, 0) == dispatcher_ea :
prev_ea = idc.prev_head(ea)
state_reg = idc.print_operand(prev_ea, 0).strip().upper()
return state_reg
ea += size
else:
ea += 1
return None
state_reg = find_state_reg(start_ea, end_ea, dispatcher_ea)
print("状态寄存器: state_reg={}".format(state_reg))
状态寄存器: state_reg=W8
第1种:直接修改
MOV state_reg, next_state
B dispatcher_ea
第2种:分支修改
MOV true_reg, true_state
MOV false_reg, false_state
CSEL state_reg, true_reg, false_reg, cond
B dispatcher_ea
相当:
state_reg = cond ? true_state: false_state
B dispatcher_ea
第1种:直接修改
MOV state_reg, next_state
B dispatcher_ea
第2种:分支修改
MOV true_reg, true_state
MOV false_reg, false_state
CSEL state_reg, true_reg, false_reg, cond
B dispatcher_ea
相当:
state_reg = cond ? true_state: false_state
B dispatcher_ea
def find_block_ea_2_next_state_list(start_ea, end_ea, dispatcher_ea, state_reg):
results = []
ea = start_ea
while ea < end_ea:
insn = ida_ua.insn_t()
size = ida_ua.decode_insn(insn, ea)
if size > 0:
mnem = ida_ua.print_insn_mnem(ea)
if mnem == "B" and idc.get_operand_value(ea, 0) == dispatcher_ea :
block_ea = ea
prev_ea = idc.prev_head(block_ea)
mnem = ida_ua.print_insn_mnem(prev_ea)
if (mnem == "MOV" or mnem == "MOVK") and idc.print_operand(prev_ea, 0).strip().upper() == state_reg:
reg = idc.print_operand(prev_ea, 0).strip().upper()
next_state = find_prev_mov_imm(block_ea, reg, start_ea)
results.append({
"block_ea": block_ea,
"next_state": next_state
})
elif mnem == "CSEL" and idc.print_operand(prev_ea, 0).strip().upper() == state_reg:
true_reg = idc.print_operand(prev_ea, 1).strip().upper()
false_reg = idc.print_operand(prev_ea, 2).strip().upper()
true_state = find_prev_mov_imm(prev_ea, true_reg, start_ea)
false_state = find_prev_mov_imm(prev_ea, false_reg, start_ea)
results.append({
"block_ea": block_ea,
"true_state": true_state,
"false_state": false_state,
})
ea += size
else:
ea += 1
return results
block_ea_2_next_state_list = find_block_ea_2_next_state_list(start_ea, end_ea, dispatcher_ea, state_reg)
for block_ea_2_next_state in block_ea_2_next_state_list:
if "next_state" in block_ea_2_next_state:
print("block_ea=0x{:X} -> next_state=0x{:X}".format(
block_ea_2_next_state["block_ea"], block_ea_2_next_state["next_state"]))
else :
print("block_ea=0x{:X} -> true_state=0x{:X}, false_state=0x{:X}".format(
block_ea_2_next_state["block_ea"],
block_ea_2_next_state["true_state"] if block_ea_2_next_state["true_state"] is not None else 0,
block_ea_2_next_state["false_state"] if block_ea_2_next_state["false_state"] is not None else 0))
block_ea=0x430DC -> next_state=0x665797A5
block_ea=0x43194 -> true_state=0x4E30550D, false_state=0xBEE4A4C9
block_ea=0x431EC -> true_state=0xA9D4543B, false_state=0xC7AC1F5F
block_ea=0x43244 -> next_state=0xEC74B33D
block_ea=0x43284 -> next_state=0x5338AB80
block_ea=0x432BC -> next_state=0x146E0C87
block_ea=0x43324 -> next_state=0x3455F111
block_ea=0x43360 -> next_state=0x37117E76
block_ea=0x433C4 -> next_state=0xBC34C1D0
block_ea=0x433D4 -> next_state=0xBEE4A4C9
block_ea=0x433F4 -> true_state=0xF5C370CA, false_state=0x667521E4
block_ea=0x43408 -> next_state=0x89EFF5EA
block_ea=0x4341C -> next_state=0x7986A6FB
block_ea=0x43434 -> true_state=0xE4DBC33F, false_state=0x667521E4
block_ea=0x43444 -> next_state=0xF5C370CA
block_ea=0x43454 -> next_state=0x9FAB5B41
block_ea=0x43470 -> next_state=0xC80CBB41
block_ea=0x4348C -> next_state=0xC7AC1F5F
block_ea=0x434A8 -> next_state=0x39649A15
block_ea=0x434C8 -> true_state=0xBD9FBBA, false_state=0x5338AB80
block_ea=0x434E8 -> true_state=0x146E0C87, false_state=0x1B166FED
block_ea=0x43194 -> true_state=0x4E30550D, false_state=0xBEE4A4C9
对应的汇编:
0x4317C MOV W8,
0x43180 MOV W9,
0x43184 CMP X0,
0x43188 MOVK W8,
0x4318C MOVK W9,
0x43190 CSEL W8, W9, W8, EQ
0x43194 B loc_43120
def find_block_ea_2_next_state_list(start_ea, end_ea, dispatcher_ea, state_reg):
results = []
ea = start_ea
while ea < end_ea:
insn = ida_ua.insn_t()
size = ida_ua.decode_insn(insn, ea)
if size > 0:
mnem = ida_ua.print_insn_mnem(ea)
if mnem == "B" and idc.get_operand_value(ea, 0) == dispatcher_ea :
block_ea = ea
prev_ea = idc.prev_head(block_ea)
mnem = ida_ua.print_insn_mnem(prev_ea)
if (mnem == "MOV" or mnem == "MOVK") and idc.print_operand(prev_ea, 0).strip().upper() == state_reg:
reg = idc.print_operand(prev_ea, 0).strip().upper()
next_state = find_prev_mov_imm(block_ea, reg, start_ea)
results.append({
"block_ea": block_ea,
"next_state": next_state
})
elif mnem == "CSEL" and idc.print_operand(prev_ea, 0).strip().upper() == state_reg:
true_reg = idc.print_operand(prev_ea, 1).strip().upper()
false_reg = idc.print_operand(prev_ea, 2).strip().upper()
true_state = find_prev_mov_imm(prev_ea, true_reg, start_ea)
false_state = find_prev_mov_imm(prev_ea, false_reg, start_ea)
results.append({
"block_ea": block_ea,
"true_state": true_state,
"false_state": false_state,
})
ea += size
else:
ea += 1
return results
block_ea_2_next_state_list = find_block_ea_2_next_state_list(start_ea, end_ea, dispatcher_ea, state_reg)
for block_ea_2_next_state in block_ea_2_next_state_list:
if "next_state" in block_ea_2_next_state:
print("block_ea=0x{:X} -> next_state=0x{:X}".format(
[培训]科锐软件逆向54期预科班、正式班开始火爆招生报名啦!!!
最后于 2025-9-25 18:08
被GhHei编辑
,原因: 纠错