-
-
[原创]LLVM Pass编写及去除——虚假控制流
-
发表于: 3天前 767
-
编写
虚假控制流使用大量不透明谓词组成恒真或恒假的算式,每个基本块之间用条件跳转连接,用不可达基本块和条件跳转达成混淆的目的
与控制流平坦化相比,它的cfg更加细长

其中会有多个if语句,也可能参杂一些函数加大混淆难度,这些函数往往返回定值
具体代码如下
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Instructions.h"
#include "SplitBasicBlock.h"
#include "Utils.h"
#include <vector>
#include <cstdlib>
#include <ctime>
using std::vector;
using namespace llvm;
static cl::opt<int> obfuTimes("bcf_loop",cl::init(1),cl::desc("obf function bcf_loop times"));
namespace{
class BogusControlFlow : public FunctionPass{
public:
static char ID;
BogusControlFlow() : FunctionPass(ID){
srand(time(0));
}
bool runOnFunction(Function &F);
void bogus(BasicBlock *BB);
Value* createBogusCmp(BasicBlock *insertAfter);
Value* createBogusCmp2(BasicBlock *insertAfter);
Function* createBogusCall(Module *M,int constVal,StringRef funcName);
};
}
bool BogusControlFlow::runOnFunction(Function &F)
{
INIT_CONTEXT(F);
FunctionPass *pass = createSplitBasicBlockPass();
pass->runOnFunction(F);
for(int i = 0; i < obfuTimes; i++)
{
vector<BasicBlock *> origBB;
for(BasicBlock &BB : F)
{
origBB.push_back(&BB);
}
for(BasicBlock *BB : origBB)
{
bogus(BB);
}
}
return true;
}
// 返回定值的函数
Function* BogusControlFlow :: createBogusCall(Module *M,int constVal,StringRef funcName)
{
Function *F = M->getFunction(funcName);
if(!F)
{
FunctionType *Ft = FunctionType::get(Type::getInt32Ty(*CONTEXT),false);
// 创建函数
F = Function::Create(Ft,GlobalValue::PrivateLinkage,funcName,M);
// 创建基本块
BasicBlock *BB = BasicBlock::Create(*CONTEXT,"",F);
// 返回指令 返回定值
ReturnInst::Create(*CONTEXT,ConstantInt::get(Type::getInt32Ty(*CONTEXT),constVal),BB);
}
return F;
}
// 第一个恒真恒假条件
Value* BogusControlFlow :: createBogusCmp(BasicBlock *insertAfter)
{
// y < 10 || x * (x + 1) % 2 == 0
Module *M = insertAfter->getModule();
// 创建全局变量指针 初始值为0
GlobalVariable *xptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"x");
GlobalVariable *yptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"y");
// 加载x y
LoadInst *x = new LoadInst(TYPE_I32,xptr,"",insertAfter);
LoadInst *y = new LoadInst(TYPE_I32,yptr,"",insertAfter);
// 比较指令 将y与10进行比较
ICmpInst *cmp1 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,y,CONST_I32(10));
// 创建一个函数 返回值为1 名称为"return_1"
Function *func1 = createBogusCall(M,1,"return_1");
// 创建call指令
CallInst *callFunc1 = CallInst::Create(func1,"",insertAfter);
// 加法指令 函数返回结果与x相加
BinaryOperator *op1 = BinaryOperator::CreateAdd(x,callFunc1,"",insertAfter);
// 乘法指令 加法结果指令与x相乘
BinaryOperator *op2 = BinaryOperator::CreateMul(x,op1,"",insertAfter);
// 除余指令
BinaryOperator *op3 = BinaryOperator::CreateSRem(op2,CONST_I32(2),"",insertAfter);
// 比较指令
ICmpInst *cmp2 = new ICmpInst(*insertAfter,CmpInst::ICMP_EQ,op3,CONST_I32(0));
// 返回位或指令的值
return BinaryOperator::CreateOr(cmp1,cmp2,"",insertAfter);
}
// 第二个恒真恒假指令
Value* BogusControlFlow :: createBogusCmp2(BasicBlock *insertAfter)
{
// x * 2 < 9 && (y + 1) * 3 < 10
Module *M = insertAfter->getModule();
GlobalVariable *xptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"x");
GlobalVariable *yptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"y");
LoadInst *x = new LoadInst(TYPE_I32,xptr,"",insertAfter);
LoadInst *y = new LoadInst(TYPE_I32,yptr,"",insertAfter);
BinaryOperator *op1 = BinaryOperator::CreateMul(x,CONST_I32(2),"",insertAfter);
ICmpInst *cmp1 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,op1,CONST_I32(9));
BinaryOperator *op2 = BinaryOperator::CreateAdd(y,CONST_I32(1),"",insertAfter);
BinaryOperator *op3 = BinaryOperator::CreateMul(op2,CONST_I32(3),"",insertAfter);
ICmpInst *cmp2 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,op3,CONST_I32(10));
return BinaryOperator::CreateAnd(cmp1,cmp2,"",insertAfter);
}
void BogusControlFlow :: bogus(BasicBlock *entryBB)
{
// 以第一个phi指令为界限 分割头基本块与中基本块
BasicBlock *bodyBB = entryBB->splitBasicBlock(entryBB->getFirstNonPHI(),"bodyBB");
// 以终结指令为界限 分割中基本块与尾基本块
BasicBlock *endBB = bodyBB->splitBasicBlock(bodyBB->getTerminator(),"endBB");
// 不可达克隆块
BasicBlock *cloneBB = createCloneBasicBlock(bodyBB);
// 去除跳转指令
bodyBB->getTerminator()->eraseFromParent();
cloneBB->getTerminator()->eraseFromParent();
entryBB->getTerminator()->eraseFromParent();
// 创建恒真恒假条件
Value *cond1 = createBogusCmp(entryBB);
Value *cond2 = createBogusCmp2(bodyBB);
// 创建条件跳转分支
BranchInst::Create(bodyBB,cloneBB,cond1,entryBB);
BranchInst::Create(endBB,cloneBB,cond2,bodyBB);
BranchInst::Create(bodyBB,cloneBB);
}
char BogusControlFlow::ID = 0;
static RegisterPass<BogusControlFlow> X("bcf","BogusControlFlow a function");
去除
例:re
题目来自ciscn2025初赛

典型的虚假控制流特征,里面出现了大量不透明谓词,先进行重命名方便分析

可以发现,除了不透明谓词之外夹杂着一些函数增大混淆难度,有些函数返回定值,有些返回参数。
接下来讲解去除的两种方式
1.常量替换

根据x86函数调用约定,a1参数为rcx,a2参数为rdx,而函数的返回值为rax。所以对于所有函数替换,只需要将原来的call指令替换成对eax的赋值即可。
而对于不透明谓词,则是替换相应赋值的寄存器,直接改为常量。

需要注意的是指令长度,图中所做的改变均比原始指令长度短,假如指令长度超过原始字节,则需要考虑nop掉无用字节进行修改。
写idc脚本批量替换
#include <idc.idc>
static NopCode(Addr, Length)
{
auto i;
for (i = 0; i < Length; i++)
{
PatchByte(Addr + i, 0x90);
}
}
// 检查是否为需要的汇编指令
static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
auto insn_name = print_insn_mnem(current_addr);
auto op1 = print_operand(current_addr,0);
auto op2 = print_operand(current_addr,1);
if(my_insn_name == 0)
{
my_insn_name = insn_name;
}
if(my_op1 == 0)
{
my_op1 = op1;
}
if(my_op2 == 0)
{
my_op2 = op2;
}
if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
{
return 1;
}
return 0;
}
// 返回对应的值
static getValue(data)
{
if(data == "cs:_0")
{
return 0;
}
if(data == "cs:_1")
{
return 1;
}
if(data == "cs:_2")
{
return 2;
}
if(data == "cs:_3")
{
return 3;
}
if(data == "cs:_4")
{
return 4;
}
if(data == "cs:_5")
{
return 5;
}
if(data == "cs:_6")
{
return 6;
}
if(data == "cs:_7")
{
return 7;
}
if(data == "cs:_8")
{
return 8;
}
if(data == "cs:_9")
{
return 9;
}
}
static patchMovData(asm_addr,reg,data)
{
auto value = getValue(data);
auto reg_data;
if(reg == "eax")
{
reg_data = 0xB8;
}
if(reg == "ecx")
{
reg_data = 0xB9;
}
if(reg == "edx")
{
reg_data = 0xBA;
}
PatchByte(asm_addr,reg_data);
PatchDword(asm_addr + 1,value);
NopCode(asm_addr + 5,2);
}
static main()
{
auto current_addr = 0x407E53;
auto end_addr = 0x408859;
while (current_addr != BADADDR && current_addr < end_addr)
{
// mov reg imm
auto data = print_operand(current_addr,1);
auto is_data = 1;
if(strstr(data,"cs:_") != -1)
{
is_data = 0;
}
auto mov_data = checkAsm(current_addr,"movsx",0,is_data);
if(mov_data)
{
msg("mov data asm addr : %X\n",current_addr);
auto reg = print_operand(current_addr,0);
patchMovData(current_addr,reg,data);
}
// call return_a2
auto return_a2_func = print_operand(current_addr,0);
if(return_a2_func == "return_a2")
{
msg("return a2 func : %X\n",current_addr);
PatchWord(current_addr,0xD089);
NopCode(current_addr + 2,3);
}
// call return_a1
auto return_a1_func = print_operand(current_addr,0);
if(return_a1_func == "return_a1")
{
msg("return a1 func : %X\n",current_addr);
PatchWord(current_addr,0xC889);
NopCode(current_addr + 2,3);
}
// return 1
auto return_1_func = print_operand(current_addr,0);
if(return_1_func == "return_1")
{
msg("return 1 func : %X\n",current_addr);
PatchByte(current_addr,0xB8);
PatchDword(current_addr + 1,1);
}
current_addr = next_head(current_addr, end_addr);
}
}
去混淆效果如下

去除非常完美
2.条件解析
一般来说,编写恒真恒假条件时不会编写太多完全不同的条件,而是对一个条件进行变化衍生出其他条件

图中这四个算式虽然不同,但它们本质上都是随机数1 * (随机数2 * 0)这样的式子,结果恒为0,只需要匹配它们汇编代码的相同部分,再patch条件跳转即可

示例idc脚本如下
#include <idc.idc>
static NopCode(Addr, Length)
{
auto i;
for (i = 0; i < Length; i++)
{
PatchByte(Addr + i, 0x90);
}
}
static findJmpAddr(current_addr,end_addr,jmp_type)
{
while(current_addr != BADADDR && current_addr < end_addr)
{
auto jz_name = print_insn_mnem(current_addr);
if(jz_name == jmp_type)
{
break;
}
current_addr = next_head(current_addr,end_addr);
}
return current_addr;
}
static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
auto insn_name = print_insn_mnem(current_addr);
auto op1 = print_operand(current_addr,0);
auto op2 = print_operand(current_addr,1);
if(my_insn_name == 0)
{
my_insn_name = insn_name;
}
if(my_op1 == 0)
{
my_op1 = op1;
}
if(my_op2 == 0)
{
my_op2 = op2;
}
if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
{
return 1;
}
return 0;
}
static main()
{
auto current_addr = 0x407E53;
auto end_addr = 0x408859;
while (current_addr != BADADDR && current_addr < end_addr)
{
auto current_res;
auto next_addr;
auto next_res;
auto next_next_addr;
auto next_next_res;
// data * (data * 0 + 6 + 3 - 9) = 0
current_res = checkAsm(current_addr,"movsx",0,"cs:_3");
next_addr = next_head(current_addr,end_addr);
next_res = checkAsm(next_addr,"movsx",0,"cs:_6");
if(current_res && next_res)
{
auto jz_addr = findJmpAddr(current_addr,end_addr,"jz");
NopCode(current_addr,jz_addr - current_addr);
NopCode(jz_addr,1);
PatchByte(jz_addr + 1,0xE9);
msg("jz addr : %X\n",jz_addr);
}
// cmp 0,data
current_res = checkAsm(current_addr,"movsx",0,"cs:_0");
next_addr = next_head(current_addr,end_addr);
next_res = checkAsm(next_addr,"cmp",0,0);
next_next_addr = next_head(next_addr,end_addr);
next_next_res = checkAsm(next_next_addr,"jbe",0,0);
if(current_res && next_res && next_next_res)
{
auto nop_addr = next_head(next_next_addr,end_addr);
NopCode(current_addr,nop_addr - current_addr);
msg("cmp 0 addr : %X\n",next_next_addr);
}
next_next_res = checkAsm(next_next_addr,"jge",0,0);
if(current_res && next_res && next_next_res)
{
NopCode(next_next_addr,1);
PatchByte(next_next_addr + 1,0xE9);
NopCode(current_addr - 5,5);
msg("cmp 0 addr : %X\n",next_next_addr);
}
current_addr = next_head(current_addr,end_addr);
}
}
这个idc脚本解析了上述随机数1 * (随机数2 * 0)的算式和随机数与0比较的算式,在替换方法效果很好的情况下,为什么还要使用这种方法呢
例:hello-obf
题目来自lilctf2025
这个题同样是不透明谓词 + 返回定值函数的虚假控制流,先使用常量替换的方式去除
#include <idc.idc>
static NopCode(Addr, Length)
{
auto i;
for (i = 0; i < Length; i++)
{
PatchByte(Addr + i, 0x90);
}
}
static PatchNumFunc(num,current_addr)
{
msg("call return num addr : %X\n",current_addr);
auto patch_num;
PatchByte(current_addr,0xB8);
if(num == "return_7")
{
patch_num = 7;
}
if(num == "return_2")
{
patch_num = 2;
}
if(num == "return_6")
{
patch_num = 6;
}
if(num == "return_3")
{
patch_num = 3;
}
if(num == "return_9")
{
patch_num = 9;
}
PatchDword(current_addr + 1,patch_num);
}
static getValue(data)
{
if(data == "cs:_0")
{
return 0;
}
if(data == "cs:_1")
{
return 1;
}
if(data == "cs:_2")
{
return 2;
}
if(data == "cs:_3")
{
return 3;
}
if(data == "cs:_4")
{
return 4;
}
if(data == "cs:_5")
{
return 5;
}
if(data == "cs:_6")
{
return 6;
}
if(data == "cs:_7")
{
return 7;
}
if(data == "cs:_8")
{
return 8;
}
if(data == "cs:_9")
{
return 9;
}
}
static PatchNum(num,current_addr)
{
msg("mov reg num addr : %X\n",current_addr);
auto value = getValue(num);
auto reg = print_operand(current_addr,0);
auto reg_byte;
if(reg == "eax")
{
reg_byte = 0xB8;
}
if(reg == "ecx")
{
reg_byte = 0xB9;
}
if(reg == "edx")
{
reg_byte = 0xBA;
}
PatchByte(current_addr,reg_byte);
PatchDword(current_addr + 1,value);
NopCode(current_addr + 5,2);
}
static main()
{
auto current_addr = 0x1400217E8;
auto end_addr = 0x1400260C2;
while (current_addr != BADADDR && current_addr < end_addr)
{
// call return_num
auto call_num = print_operand(current_addr,0);
if(strstr(call_num,"return_") != -1)
{
PatchNumFunc(call_num,current_addr);
}
// mov reg num
auto num = print_operand(current_addr,1);
if(strstr(num,"cs:_") != -1)
{
PatchNum(num,current_addr);
}
current_addr = next_head(current_addr, end_addr);
}
}
效果如下

其中这行算式并没有被ida优化掉,因为使用了浮点数运算。发现每行算式都有固定的1 * (7 - (3 + 3))式子,这个时候用条件解析会去除更加干净
观察发现是将eax放入栈中,匹配混淆段特征并写脚本去除


#include <idc.idc>
static NopCode(Addr, Length)
{
auto i;
for (i = 0; i < Length; i++)
{
PatchByte(Addr + i, 0x90);
}
}
static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
auto insn_name = print_insn_mnem(current_addr);
auto op1 = print_operand(current_addr,0);
auto op2 = print_operand(current_addr,1);
if(my_insn_name == 0)
{
my_insn_name = insn_name;
}
if(my_op1 == 0)
{
my_op1 = op1;
}
if(my_op2 == 0)
{
my_op2 = op2;
}
if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
{
return 1;
}
return 0;
}
static findMovStack(current_addr,end_addr)
{
while(current_addr != BADADDR && current_addr < end_addr)
{
auto mov_stack = checkAsm(current_addr,"movsd",0,"xmm0");
if(mov_stack)
{
return current_addr;
}
current_addr = next_head(current_addr,end_addr);
}
}
static findEnd(current_addr,end_addr)
{
while(current_addr != BADADDR && current_addr < end_addr)
{
auto res = checkAsm(current_addr,"cvttsd2si","eax","xmm0");
if(res)
{
return current_addr;
}
current_addr = next_head(current_addr,end_addr);
}
}
static main()
{
auto current_addr = 0x1400217E8;
auto end_addr = 0x1400260C2;
while (current_addr != BADADDR && current_addr < end_addr)
{
auto sub_eax = checkAsm(current_addr,"sub","eax",0);
auto pxor_addr = next_head(current_addr,end_addr);
auto pxor_xmm0 = checkAsm(pxor_addr,"pxor","xmm0","xmm0");
auto cvtsi2sd_addr = next_head(pxor_addr,end_addr);
auto cvtsi2sd_xmm0 = checkAsm(cvtsi2sd_addr,"cvtsi2sd","xmm0","eax");
if(sub_eax && pxor_xmm0 && cvtsi2sd_xmm0)
{
msg("addr : %X\n",current_addr);
auto mov_stack_addr = findMovStack(cvtsi2sd_addr + 12,end_addr);
NopCode(mov_stack_addr,1);
PatchWord(mov_stack_addr + 1,0x8948);
msg("patch addr : %X\n",mov_stack_addr);
auto next_addr = next_head(mov_stack_addr,end_addr);
auto cvttsd2si_addr = findEnd(mov_stack_addr,end_addr);
auto obf_end_addr = next_head(cvttsd2si_addr,end_addr);
msg("end addr : %X\n",obf_end_addr);
NopCode(pxor_addr,mov_stack_addr - pxor_addr);
NopCode(next_addr,obf_end_addr - next_addr);
}
current_addr = next_head(current_addr, end_addr);
}
}
效果如下

这个例子其实不是举得特别好,但是我没找到其他去不掉的文件,条件解析比常量替换要麻烦很多,所以这里只是示例,提供另一种去除思路
总结
虚假控制流主要由恒真恒假跳转控制程序走向,其中夹杂不可达块与无逻辑函数以达成混淆目的。最简单去除方式就是替换不透明谓词为对寄存器赋值,在特定情况下,由于混淆部分代码大部分是相同的,也可以考虑解析特征整块去除。
如果大家有其他思路或者有趣的实例欢迎向我分享!
[培训]《冰与火的战歌:Windows内核攻防实战》!从零到实战,融合AI与Windows内核攻防全技术栈,打造具备自动化能力的内核开发高手。