首页
社区
课程
招聘
[原创]LLVM Pass编写及去除——虚假控制流
发表于: 3天前 767

[原创]LLVM Pass编写及去除——虚假控制流

3天前
767

编写

虚假控制流使用大量不透明谓词组成恒真或恒假的算式,每个基本块之间用条件跳转连接,用不可达基本块和条件跳转达成混淆的目的

与控制流平坦化相比,它的cfg更加细长

其中会有多个if语句,也可能参杂一些函数加大混淆难度,这些函数往往返回定值

具体代码如下

#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Instructions.h"
#include "SplitBasicBlock.h"
#include "Utils.h"
#include <vector>
#include <cstdlib>
#include <ctime>
using std::vector;
using namespace llvm;

static cl::opt<int> obfuTimes("bcf_loop",cl::init(1),cl::desc("obf function bcf_loop times"));

namespace{
    class BogusControlFlow : public FunctionPass{
        public:
            static char ID;
            BogusControlFlow() : FunctionPass(ID){
                srand(time(0));
            }

            bool runOnFunction(Function &F);
            void bogus(BasicBlock *BB);

            Value* createBogusCmp(BasicBlock *insertAfter);
            Value* createBogusCmp2(BasicBlock *insertAfter);

            Function* createBogusCall(Module *M,int constVal,StringRef funcName);

    };
}

bool BogusControlFlow::runOnFunction(Function &F)
{
    INIT_CONTEXT(F);
    FunctionPass *pass = createSplitBasicBlockPass();
    pass->runOnFunction(F);
    for(int i = 0; i < obfuTimes; i++)
    {
        vector&lt;BasicBlock *&gt; origBB;
        for(BasicBlock &BB : F)
        {
            origBB.push_back(&BB);
        }
        for(BasicBlock *BB : origBB)
        {
            bogus(BB);
        }
    }
    return true;
}

// 返回定值的函数
Function* BogusControlFlow :: createBogusCall(Module *M,int constVal,StringRef funcName)
{
    Function *F = M->getFunction(funcName);
    if(!F)
    {
        FunctionType *Ft = FunctionType::get(Type::getInt32Ty(*CONTEXT),false);
        // 创建函数
        F = Function::Create(Ft,GlobalValue::PrivateLinkage,funcName,M);
        // 创建基本块
        BasicBlock *BB = BasicBlock::Create(*CONTEXT,"",F);
        // 返回指令 返回定值
        ReturnInst::Create(*CONTEXT,ConstantInt::get(Type::getInt32Ty(*CONTEXT),constVal),BB);
    }
    return F;
}

// 第一个恒真恒假条件
Value* BogusControlFlow :: createBogusCmp(BasicBlock *insertAfter)
{
    // y < 10 || x * (x + 1) % 2 == 0
    Module *M = insertAfter->getModule();
    // 创建全局变量指针 初始值为0
    GlobalVariable *xptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"x");
    GlobalVariable *yptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"y");
    // 加载x y
    LoadInst *x = new LoadInst(TYPE_I32,xptr,"",insertAfter);
    LoadInst *y = new LoadInst(TYPE_I32,yptr,"",insertAfter);
    // 比较指令 将y与10进行比较
    ICmpInst *cmp1 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,y,CONST_I32(10));
    // 创建一个函数 返回值为1 名称为"return_1"
    Function *func1 = createBogusCall(M,1,"return_1");
    // 创建call指令
    CallInst *callFunc1 = CallInst::Create(func1,"",insertAfter);
    // 加法指令 函数返回结果与x相加
    BinaryOperator *op1 = BinaryOperator::CreateAdd(x,callFunc1,"",insertAfter);
    // 乘法指令 加法结果指令与x相乘
    BinaryOperator *op2 = BinaryOperator::CreateMul(x,op1,"",insertAfter);
    // 除余指令
    BinaryOperator *op3 = BinaryOperator::CreateSRem(op2,CONST_I32(2),"",insertAfter);
    // 比较指令
    ICmpInst *cmp2 = new ICmpInst(*insertAfter,CmpInst::ICMP_EQ,op3,CONST_I32(0));
    // 返回位或指令的值
    return BinaryOperator::CreateOr(cmp1,cmp2,"",insertAfter);
}

// 第二个恒真恒假指令
Value* BogusControlFlow :: createBogusCmp2(BasicBlock *insertAfter)
{
    // x * 2 < 9 && (y + 1) * 3 < 10
    Module *M = insertAfter->getModule();
    GlobalVariable *xptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"x");
    GlobalVariable *yptr = new GlobalVariable(*M,TYPE_I32,false,GlobalValue::CommonLinkage,CONST_I32(0),"y");
    LoadInst *x = new LoadInst(TYPE_I32,xptr,"",insertAfter);
    LoadInst *y = new LoadInst(TYPE_I32,yptr,"",insertAfter);
    BinaryOperator *op1 = BinaryOperator::CreateMul(x,CONST_I32(2),"",insertAfter);
    ICmpInst *cmp1 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,op1,CONST_I32(9));
    BinaryOperator *op2 = BinaryOperator::CreateAdd(y,CONST_I32(1),"",insertAfter);
    BinaryOperator *op3 = BinaryOperator::CreateMul(op2,CONST_I32(3),"",insertAfter);
    ICmpInst *cmp2 = new ICmpInst(*insertAfter,CmpInst::ICMP_SLT,op3,CONST_I32(10));
    return BinaryOperator::CreateAnd(cmp1,cmp2,"",insertAfter);
}

void BogusControlFlow :: bogus(BasicBlock *entryBB)
{
    // 以第一个phi指令为界限 分割头基本块与中基本块
    BasicBlock *bodyBB = entryBB->splitBasicBlock(entryBB->getFirstNonPHI(),"bodyBB");
    // 以终结指令为界限 分割中基本块与尾基本块
    BasicBlock *endBB = bodyBB->splitBasicBlock(bodyBB->getTerminator(),"endBB");
    // 不可达克隆块
    BasicBlock *cloneBB = createCloneBasicBlock(bodyBB);

    // 去除跳转指令
    bodyBB->getTerminator()->eraseFromParent();
    cloneBB->getTerminator()->eraseFromParent();
    entryBB->getTerminator()->eraseFromParent();

    // 创建恒真恒假条件
    Value *cond1 = createBogusCmp(entryBB);
    Value *cond2 = createBogusCmp2(bodyBB);

    // 创建条件跳转分支
    BranchInst::Create(bodyBB,cloneBB,cond1,entryBB);
    BranchInst::Create(endBB,cloneBB,cond2,bodyBB);
    BranchInst::Create(bodyBB,cloneBB);
}

char BogusControlFlow::ID = 0;
static RegisterPass&lt;BogusControlFlow&gt; X("bcf","BogusControlFlow a function");

去除

例:re

题目来自ciscn2025初赛

典型的虚假控制流特征,里面出现了大量不透明谓词,先进行重命名方便分析

可以发现,除了不透明谓词之外夹杂着一些函数增大混淆难度,有些函数返回定值,有些返回参数。

接下来讲解去除的两种方式

1.常量替换

根据x86函数调用约定,a1参数为rcx,a2参数为rdx,而函数的返回值为rax。所以对于所有函数替换,只需要将原来的call指令替换成对eax的赋值即可。

而对于不透明谓词,则是替换相应赋值的寄存器,直接改为常量。

需要注意的是指令长度,图中所做的改变均比原始指令长度短,假如指令长度超过原始字节,则需要考虑nop掉无用字节进行修改。

写idc脚本批量替换

#include &lt;idc.idc&gt;

static NopCode(Addr, Length)
{
    auto i;
    for (i = 0; i < Length; i++)
    {
        PatchByte(Addr + i, 0x90);
    }
}

// 检查是否为需要的汇编指令
static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
    auto insn_name = print_insn_mnem(current_addr);
    auto op1 = print_operand(current_addr,0);
    auto op2 = print_operand(current_addr,1);
    if(my_insn_name == 0)
    {
        my_insn_name = insn_name;
    }
    if(my_op1 == 0)
    {
        my_op1 = op1;
    }
    if(my_op2 == 0)
    {
        my_op2 = op2;
    }
    if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
    {
        return 1;
    }
    return 0;
}

// 返回对应的值
static getValue(data)
{
    if(data == "cs:_0")
    {
        return 0;
    }
    if(data == "cs:_1")
    {
        return 1;
    }
    if(data == "cs:_2")
    {
        return 2;
    }
    if(data == "cs:_3")
    {
        return 3;
    }
    if(data == "cs:_4")
    {
        return 4;
    }
    if(data == "cs:_5")
    {
        return 5;
    }
    if(data == "cs:_6")
    {
        return 6;
    }
    if(data == "cs:_7")
    {
        return 7;
    }
    if(data == "cs:_8")
    {
        return 8;
    }
    if(data == "cs:_9")
    {
        return 9;
    }
}

static patchMovData(asm_addr,reg,data)
{
    auto value = getValue(data);
    auto reg_data;
    if(reg == "eax")
    {
        reg_data = 0xB8;
    }
    if(reg == "ecx")
    {
        reg_data = 0xB9;
    }
    if(reg == "edx")
    {
        reg_data = 0xBA;
    }
    PatchByte(asm_addr,reg_data);
    PatchDword(asm_addr + 1,value);
    NopCode(asm_addr + 5,2);
}

static main()
{
    auto current_addr = 0x407E53;
    auto end_addr = 0x408859;
    while (current_addr != BADADDR && current_addr < end_addr)
    {
        // mov reg imm
        auto data = print_operand(current_addr,1);
        auto is_data = 1;
        if(strstr(data,"cs:_") != -1)
        {
            is_data = 0;
        }
        auto mov_data = checkAsm(current_addr,"movsx",0,is_data);
        if(mov_data)
        {
            msg("mov data asm addr : %X\n",current_addr);
            auto reg = print_operand(current_addr,0);
            patchMovData(current_addr,reg,data);
        }
        // call return_a2
        auto return_a2_func = print_operand(current_addr,0);
        if(return_a2_func == "return_a2")
        {
            msg("return a2 func : %X\n",current_addr);
            PatchWord(current_addr,0xD089);
            NopCode(current_addr + 2,3);
        }
        // call return_a1
        auto return_a1_func = print_operand(current_addr,0);
        if(return_a1_func == "return_a1")
        {
            msg("return a1 func : %X\n",current_addr);
            PatchWord(current_addr,0xC889);
            NopCode(current_addr + 2,3);
        }       
        // return 1
        auto return_1_func = print_operand(current_addr,0);
        if(return_1_func == "return_1")
        {
            msg("return 1 func : %X\n",current_addr);
            PatchByte(current_addr,0xB8);
            PatchDword(current_addr + 1,1);
        }

        current_addr = next_head(current_addr, end_addr);
    }
}

去混淆效果如下

去除非常完美

2.条件解析

一般来说,编写恒真恒假条件时不会编写太多完全不同的条件,而是对一个条件进行变化衍生出其他条件

图中这四个算式虽然不同,但它们本质上都是随机数1 * (随机数2 * 0)这样的式子,结果恒为0,只需要匹配它们汇编代码的相同部分,再patch条件跳转即可

示例idc脚本如下

#include &lt;idc.idc&gt;

static NopCode(Addr, Length)
{
    auto i;
    for (i = 0; i < Length; i++)
    {
        PatchByte(Addr + i, 0x90);
    }
}

static findJmpAddr(current_addr,end_addr,jmp_type)
{
    while(current_addr != BADADDR && current_addr < end_addr)
    {
        auto jz_name = print_insn_mnem(current_addr);
        if(jz_name == jmp_type)
        {
            break;
        }
        current_addr = next_head(current_addr,end_addr);
    }
    return current_addr;
}

static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
    auto insn_name = print_insn_mnem(current_addr);
    auto op1 = print_operand(current_addr,0);
    auto op2 = print_operand(current_addr,1);
    if(my_insn_name == 0)
    {
        my_insn_name = insn_name;
    }
    if(my_op1 == 0)
    {
        my_op1 = op1;
    }
    if(my_op2 == 0)
    {
        my_op2 = op2;
    }
    if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
    {
        return 1;
    }
    return 0;
}

static main()
{
    auto current_addr = 0x407E53;
    auto end_addr = 0x408859;
    while (current_addr != BADADDR && current_addr < end_addr)
    {
        auto current_res;
        auto next_addr;
        auto next_res;
        auto next_next_addr;
        auto next_next_res;
        // data * (data * 0 + 6 + 3 - 9) = 0
        current_res = checkAsm(current_addr,"movsx",0,"cs:_3");
        next_addr = next_head(current_addr,end_addr);
        next_res = checkAsm(next_addr,"movsx",0,"cs:_6");
        if(current_res && next_res)
        {
            auto jz_addr = findJmpAddr(current_addr,end_addr,"jz");
            NopCode(current_addr,jz_addr - current_addr);
            NopCode(jz_addr,1); 
            PatchByte(jz_addr + 1,0xE9);
            msg("jz addr : %X\n",jz_addr);
        }
        // cmp 0,data
        current_res = checkAsm(current_addr,"movsx",0,"cs:_0");
        next_addr = next_head(current_addr,end_addr);
        next_res = checkAsm(next_addr,"cmp",0,0);
        next_next_addr = next_head(next_addr,end_addr);
        next_next_res = checkAsm(next_next_addr,"jbe",0,0);
        if(current_res && next_res && next_next_res)
        {
            auto nop_addr = next_head(next_next_addr,end_addr);
            NopCode(current_addr,nop_addr - current_addr);
            msg("cmp 0 addr : %X\n",next_next_addr);
        }
        next_next_res = checkAsm(next_next_addr,"jge",0,0);
        if(current_res && next_res && next_next_res)
        {
            NopCode(next_next_addr,1);
            PatchByte(next_next_addr + 1,0xE9);
            NopCode(current_addr - 5,5);
            msg("cmp 0 addr : %X\n",next_next_addr);
        }

        current_addr = next_head(current_addr,end_addr);
    }
}

这个idc脚本解析了上述随机数1 * (随机数2 * 0)的算式和随机数与0比较的算式,在替换方法效果很好的情况下,为什么还要使用这种方法呢

例:hello-obf

题目来自lilctf2025

这个题同样是不透明谓词 + 返回定值函数的虚假控制流,先使用常量替换的方式去除

#include &lt;idc.idc&gt;

static NopCode(Addr, Length)
{
    auto i;
    for (i = 0; i < Length; i++)
    {
        PatchByte(Addr + i, 0x90);
    }
}

static PatchNumFunc(num,current_addr)
{
    msg("call return num addr : %X\n",current_addr);
    auto patch_num;
    PatchByte(current_addr,0xB8);
    if(num == "return_7")
    {
        patch_num = 7;
    }
    if(num == "return_2")
    {
        patch_num = 2;
    }
    if(num == "return_6")
    {
        patch_num = 6;
    }
    if(num == "return_3")
    {
        patch_num = 3;
    }
    if(num == "return_9")
    {
        patch_num = 9;
    }
    PatchDword(current_addr + 1,patch_num);
}

static getValue(data)
{
    if(data == "cs:_0")
    {
        return 0;
    }
    if(data == "cs:_1")
    {
        return 1;
    }
    if(data == "cs:_2")
    {
        return 2;
    }
    if(data == "cs:_3")
    {
        return 3;
    }
    if(data == "cs:_4")
    {
        return 4;
    }
    if(data == "cs:_5")
    {
        return 5;
    }
    if(data == "cs:_6")
    {
        return 6;
    }
    if(data == "cs:_7")
    {
        return 7;
    }
    if(data == "cs:_8")
    {
        return 8;
    }
    if(data == "cs:_9")
    {
        return 9;
    }
}

static PatchNum(num,current_addr)
{
    msg("mov reg num addr : %X\n",current_addr);
    auto value = getValue(num);
    auto reg = print_operand(current_addr,0);
    auto reg_byte;
    if(reg == "eax")
    {
        reg_byte = 0xB8;
    }
    if(reg == "ecx")
    {
        reg_byte = 0xB9;
    }
    if(reg == "edx")
    {
        reg_byte = 0xBA;
    }
    PatchByte(current_addr,reg_byte);
    PatchDword(current_addr + 1,value);
    NopCode(current_addr + 5,2);
}

static main()
{
    auto current_addr = 0x1400217E8;
    auto end_addr = 0x1400260C2;
    while (current_addr != BADADDR && current_addr < end_addr)
    {
        // call return_num
        auto call_num = print_operand(current_addr,0);
        if(strstr(call_num,"return_") != -1)
        {
            PatchNumFunc(call_num,current_addr);
        }

        // mov reg num
        auto num = print_operand(current_addr,1);
        if(strstr(num,"cs:_") != -1)
        {
            PatchNum(num,current_addr);
        }
        current_addr = next_head(current_addr, end_addr);
    }
}

效果如下

其中这行算式并没有被ida优化掉,因为使用了浮点数运算。发现每行算式都有固定的1 * (7 - (3 + 3))式子,这个时候用条件解析会去除更加干净

观察发现是将eax放入栈中,匹配混淆段特征并写脚本去除

#include &lt;idc.idc&gt;

static NopCode(Addr, Length)
{
    auto i;
    for (i = 0; i < Length; i++)
    {
        PatchByte(Addr + i, 0x90);
    }
}

static checkAsm(current_addr,my_insn_name,my_op1,my_op2)
{
    auto insn_name = print_insn_mnem(current_addr);
    auto op1 = print_operand(current_addr,0);
    auto op2 = print_operand(current_addr,1);
    if(my_insn_name == 0)
    {
        my_insn_name = insn_name;
    }
    if(my_op1 == 0)
    {
        my_op1 = op1;
    }
    if(my_op2 == 0)
    {
        my_op2 = op2;
    }
    if(insn_name == my_insn_name && op1 == my_op1 && op2 == my_op2)
    {
        return 1;
    }
    return 0;
}

static findMovStack(current_addr,end_addr)
{
    while(current_addr != BADADDR && current_addr < end_addr)
    {
        auto mov_stack = checkAsm(current_addr,"movsd",0,"xmm0");
        if(mov_stack)
        {
            return current_addr;
        }
        current_addr = next_head(current_addr,end_addr);
    }
}

static findEnd(current_addr,end_addr)
{
    while(current_addr != BADADDR && current_addr < end_addr)
    {
        auto res = checkAsm(current_addr,"cvttsd2si","eax","xmm0");
        if(res)
        {
            return current_addr;
        }
        current_addr = next_head(current_addr,end_addr);
    }
} 

static main()
{
    auto current_addr = 0x1400217E8;
    auto end_addr = 0x1400260C2;
    while (current_addr != BADADDR && current_addr < end_addr)
    {
        auto sub_eax = checkAsm(current_addr,"sub","eax",0);
        auto pxor_addr = next_head(current_addr,end_addr);
        auto pxor_xmm0 = checkAsm(pxor_addr,"pxor","xmm0","xmm0");
        auto cvtsi2sd_addr = next_head(pxor_addr,end_addr);
        auto cvtsi2sd_xmm0 = checkAsm(cvtsi2sd_addr,"cvtsi2sd","xmm0","eax");
        if(sub_eax && pxor_xmm0 && cvtsi2sd_xmm0)
        {
            msg("addr : %X\n",current_addr);
            auto mov_stack_addr = findMovStack(cvtsi2sd_addr + 12,end_addr);
            NopCode(mov_stack_addr,1);
            PatchWord(mov_stack_addr + 1,0x8948);
            msg("patch addr : %X\n",mov_stack_addr);
            auto next_addr = next_head(mov_stack_addr,end_addr);
            auto cvttsd2si_addr = findEnd(mov_stack_addr,end_addr);
            auto obf_end_addr = next_head(cvttsd2si_addr,end_addr);
            msg("end addr : %X\n",obf_end_addr);
            NopCode(pxor_addr,mov_stack_addr - pxor_addr);
            NopCode(next_addr,obf_end_addr - next_addr);
        }
        current_addr = next_head(current_addr, end_addr);
    }
}

效果如下

这个例子其实不是举得特别好,但是我没找到其他去不掉的文件,条件解析比常量替换要麻烦很多,所以这里只是示例,提供另一种去除思路

总结

虚假控制流主要由恒真恒假跳转控制程序走向,其中夹杂不可达块与无逻辑函数以达成混淆目的。最简单去除方式就是替换不透明谓词为对寄存器赋值,在特定情况下,由于混淆部分代码大部分是相同的,也可以考虑解析特征整块去除。

如果大家有其他思路或者有趣的实例欢迎向我分享!


[培训]《冰与火的战歌:Windows内核攻防实战》!从零到实战,融合AI与Windows内核攻防全技术栈,打造具备自动化能力的内核开发高手。

收藏
免费 3
支持
分享
最新回复 (1)
雪    币: 104
活跃值: (8407)
能力值: ( LV2,RANK:10 )
在线值:
发帖
回帖
粉丝
2
tql
2天前
0
游客
登录 | 注册 方可回帖
返回