首页
社区
课程
招聘
[原创]Android so hook三部曲(一):dlopen和dlsym的自实现
发表于: 2024-3-29 18:39 7079

[原创]Android so hook三部曲(一):dlopen和dlsym的自实现

2024-3-29 18:39
7079

对so hook有刚需的人应该都知道,hook的第一步就是要拿到被hook方法的地址,而想要获取方法地址就避不开dlopen()和dlsym()这两个好基友函数,但Android为了避免伤及友军便在Android 7.0以上版本对这两个函数的使用做了限制,以至于在获取很多未公开库方法时会获取失败,目的也是为了提高安全性和稳定性。出发点是好的,但我们有刚需,我们就得造它。

目的就是将目标so文件模拟加载到内存中,大致分为以下几个步骤:

dlsym则模拟源码实现方式

//初始化函数
typedef void*(*PFN_INIT)();
 
Elf64_Hash g_hash = {0};
char* pStrTable = NULL;
Elf64_Sym* pSymTable = NULL;
 
char* bufNedded[256] = {0};
size_t nNumOfNeede = 0;
 
Elf64_Rela* pRelaDyn = NULL;
size_t nNumOfRela=0;
Elf64_Rela* pRelaPlt = NULL;
size_t nNumOfRelaPlt = 0;
 
PFN_INIT* bufInis = NULL;
size_t nNumOfInis = 0;
 
//重定位函数
__attribute__((noinline))
void Relocate(uint8_t* pBase, Elf64_Rela* pRel,
size_t nNumOfRels,Elf64_Sym* pSym,void*hSos[],
size_t nNumOfSos,
const char* pStr)
{
    //解析重定位表
    for (size_t i = 0; i < nNumOfRels; i++)
    {
        uint32_t nSym = ELF64_R_SYM(pRel[i].r_info);    //索引
        uint32_t nType = ELF64_R_TYPE(pRel[i].r_info);  //类型
 
        //根据符号获取地址
        void* nAddr = NULL;
        if(pSym[nSym].st_value != 0)
        {
            //导出符号,自己模块内部的符号
            nAddr = pBase+pSym[nSym].st_value;
        }
        else{
            //导入符号,其他模块符号
            for(size_t i=0; i<nNumOfSos;i++)
            {
                nAddr = dlsym(hSos[i],pStr+pSym[nSym].st_name);
                if(nAddr!=NULL)
                {
                    break;   //找到了
                }
            }
        }
 
        switch (nType)
        {
        case R_AARCH64_RELATIVE:    //相对
            *(uint64_t*)(pBase+pRel[i].r_offset) = (uint64_t)(pBase+pRel[i].r_addend);
            break;
        case R_AARCH64_GLOB_DAT:    //全局偏移量
            *(uint64_t*)(pBase+pRel[i].r_offset) = (uint64_t)nAddr;
            break;
        case R_AARCH64_JUMP_SLOT:   //跳转槽
            *(uint64_t*)(pBase+pRel[i].r_offset) = (uint64_t)nAddr;
            break;
        default:
            break;
        }
    }
}
 
__attribute__((noinline))
void* MyDlSym(uint8_t* pBase, const char* szName)
{
    //判断要查找的字符是否存在于此so内
    uint32_t hash =  gnu_hash(szName);    //计算hash
    uint32_t h2 = hash>>g_hash.shift2;
 
    uint32_t bloom_mask_bits = 64;
    uint32_t word_num = (hash / bloom_mask_bits)& g_hash.maskswords;
    uint64_t bloom_word = g_hash.gnu_bloom_filter_[word_num];
 
    if( (1&(bloom_word>>(hash%bloom_mask_bits)) & (bloom_word>>(h2%bloom_mask_bits))) == 0 )
    {
        //不在模块内
        return NULL;
    }
 
    uint32_t n = g_hash.gnu_bucket_[hash % g_hash.nbucket];
 
    do
    {
        Elf64_Sym* s = pSymTable+n;
        if( ((g_hash.gnu_chain_[n] ^ hash) >> 1) == 0 &&
            strcmp(pStrTable+s->st_name, szName)==0 )
            {
                //找到了,返回地址
                return pBase+s->st_value;
            }
 
    } while ((g_hash.gnu_chain_[n++]&1) == 0);
}
 
void* MyDlopen(const char* szName)
{
    //读取文件、文件头和段表
    FILE* file = fopen(szName,"rb");
    if(file == NULL)
    {
        printf("open file error!\n");
        return 0;
    }
 
    Elf64_Ehdr hdr = {0};               //elf hander
    if(fread(&hdr, 1, sizeof(hdr), file) != sizeof(hdr))
    {
        printf("read Ehdr error!\n");
        return 0;
    }
 
    size_t nSizeOfPhdrs = hdr.e_phentsize*hdr.e_phnum;   //段表大小
    Elf64_Phdr* phdrs = (Elf64_Phdr*)malloc(nSizeOfPhdrs);
    if( fread(phdrs, 1, nSizeOfPhdrs,file) != nSizeOfPhdrs)
    {
        printf("read phdr error!\n");
        return 0;
    }
     
    //fclose(file);
 
    //申请内存、映射 
    size_t nLoadSize = 0;
    for(int i=hdr.e_phnum-1;i>=0;i--)
    {
        if(phdrs[i].p_type == PT_LOAD)
        {
            nLoadSize = ((phdrs[i].p_vaddr+phdrs[i].p_memsz+PAGE_SIZE-1)/PAGE_SIZE)*PAGE_SIZE;    //需要申请的内存大小
            break;
        }
    }
 
    uint8_t* pBase = mmap64(NULL, nLoadSize, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);  //申请空间
    if(pBase == MAP_FAILED)
    {
        printf("mmap64 error: %s\n",strerror(errno));
        return 0;
    }
 
 
    for(size_t i=0;i<hdr.e_phnum;i++)
    {
        if(phdrs[i].p_type == PT_LOAD)
        {
            fseek(file, phdrs[i].p_offset,SEEK_SET);
            fread(pBase+phdrs[i].p_vaddr, 1 , phdrs[i].p_filesz,file);    //读取文件到内存
        }
    }
    free(phdrs);
 
    // 定位动态段
    phdrs = (Elf64_Phdr*)(pBase+hdr.e_phoff);
    int nDyncIdx = 0;    //动态段索引
    Elf64_Dyn* pDyns = NULL;
    size_t nNumofDyns = 0;     //动态段元素个数
     
    for(size_t i=0;i<hdr.e_phnum;i++)
    {
        if(phdrs[i].p_type == PT_DYNAMIC)
        {
            nDyncIdx = i;
            pDyns = (Elf64_Dyn*)(pBase+phdrs[i].p_vaddr);
            nNumofDyns = phdrs[i].p_filesz/sizeof(Elf64_Dyn);
            break;
        }
    }
 
    //解析动态段
    while(pDyns->d_tag!= DT_NULL)
    {
        switch(pDyns->d_tag)
        {
        case DT_STRTAB:    //字符串段
            pStrTable = (char*)(pBase+pDyns->d_un.d_ptr);   //模块基址+字符串表偏移
            break;
 
        case DT_SYMTAB:    //符号表
            pSymTable = (Elf64_Sym*)(pBase+pDyns->d_un.d_ptr);
            break;
 
        case DT_NEEDED:    //elf文件依赖项
            bufNedded[nNumOfNeede++] = pDyns->d_un.d_ptr;     //str表还没解析,先拿偏移
            break;
 
        case DT_RELA:      //重定位表
            pRelaDyn = (Elf64_Rela*)(pBase+pDyns->d_un.d_ptr);
            break;
        case DT_RELASZ:
            nNumOfRela = pDyns->d_un.d_val/sizeof(Elf64_Rela);
            break;
        case DT_JMPREL:    //plt表
            pRelaPlt = (Elf64_Rela*)(pBase+pDyns->d_un.d_ptr);
            break;
        case DT_PLTRELSZ:
            nNumOfRelaPlt = pDyns->d_un.d_val/sizeof(Elf64_Rela);
            break;
 
        case DT_GNU_HASH:  //hash
        {
            uint8_t* pHashTable = pBase+pDyns->d_un.d_ptr;
            g_hash.nbucket = ((uint32_t*)pHashTable)[0];
            g_hash.symindex = ((uint32_t*)pHashTable)[1];
            g_hash.maskswords = ((uint32_t*)pHashTable)[2];
            g_hash.shift2 = ((uint32_t*)pHashTable)[3];
            g_hash.gnu_bloom_filter_ = (uint64_t*)(pHashTable+16);
            g_hash.gnu_bucket_ = (uint32_t*)(g_hash.gnu_bloom_filter_+g_hash.maskswords);
            g_hash.gnu_chain_ = g_hash.gnu_bucket_+g_hash.nbucket-g_hash .symindex;
 
            --g_hash.maskswords;
            break;
        }
 
        case DT_INIT_ARRAY:    //初始化
            bufInis = (PFN_INIT*)(pBase+pDyns->d_un.d_ptr);
            break;
        case DT_INIT_ARRAYSZ:
            nNumOfInis = pDyns->d_un.d_val/sizeof(void*);
            break;
 
        default:
            break;
        }
 
        pDyns++;
    }
 
    //加载模块,此so需要依赖的elf,方便查找导入符号
    void** hSos = malloc(sizeof(void*)*nNumOfNeede);
    for(size_t i=0;i<nNumOfNeede;i++)
    {
        bufNedded[i] = (uint64_t)bufNedded[i]+pStrTable;      //字符串表已加载,偏移加字符串表基址为目标字符串地址
        hSos[i] = dlopen(bufNedded[i], RTLD_NOW);
    }
 
 
    //重定位
    Relocate(pBase, pRelaDyn, nNumOfRela,pSymTable,hSos,nNumOfNeede,pStrTable);
    Relocate(pBase, pRelaPlt,nNumOfRelaPlt,pSymTable,hSos,nNumOfNeede,pStrTable);
 
 
    //调用初始化函数
    for(size_t i = 0;i<nNumOfInis;i++)
    {
        bufInis[i]();
    }
 
    return pBase;
}
//初始化函数
typedef void*(*PFN_INIT)();
 
Elf64_Hash g_hash = {0};
char* pStrTable = NULL;
Elf64_Sym* pSymTable = NULL;
 
char* bufNedded[256] = {0};
size_t nNumOfNeede = 0;
 
Elf64_Rela* pRelaDyn = NULL;
size_t nNumOfRela=0;
Elf64_Rela* pRelaPlt = NULL;
size_t nNumOfRelaPlt = 0;
 
PFN_INIT* bufInis = NULL;
size_t nNumOfInis = 0;
 
//重定位函数
__attribute__((noinline))
void Relocate(uint8_t* pBase, Elf64_Rela* pRel,
size_t nNumOfRels,Elf64_Sym* pSym,void*hSos[],
size_t nNumOfSos,
const char* pStr)
{
    //解析重定位表
    for (size_t i = 0; i < nNumOfRels; i++)
    {
        uint32_t nSym = ELF64_R_SYM(pRel[i].r_info);    //索引
        uint32_t nType = ELF64_R_TYPE(pRel[i].r_info);  //类型
 
        //根据符号获取地址
        void* nAddr = NULL;
        if(pSym[nSym].st_value != 0)
        {
            //导出符号,自己模块内部的符号
            nAddr = pBase+pSym[nSym].st_value;
        }
        else{
            //导入符号,其他模块符号
            for(size_t i=0; i<nNumOfSos;i++)
            {
                nAddr = dlsym(hSos[i],pStr+pSym[nSym].st_name);
                if(nAddr!=NULL)
                {
                    break;   //找到了
                }
            }
        }
 
        switch (nType)
        {
        case R_AARCH64_RELATIVE:    //相对
            *(uint64_t*)(pBase+pRel[i].r_offset) = (uint64_t)(pBase+pRel[i].r_addend);
            break;
        case R_AARCH64_GLOB_DAT:    //全局偏移量
            *(uint64_t*)(pBase+pRel[i].r_offset) = (uint64_t)nAddr;
            break;
        case R_AARCH64_JUMP_SLOT:   //跳转槽
            *(uint64_t*)(pBase+pRel[i].r_offset) = (uint64_t)nAddr;
            break;
        default:
            break;
        }
    }
}
 
__attribute__((noinline))
void* MyDlSym(uint8_t* pBase, const char* szName)
{
    //判断要查找的字符是否存在于此so内
    uint32_t hash =  gnu_hash(szName);    //计算hash
    uint32_t h2 = hash>>g_hash.shift2;
 
    uint32_t bloom_mask_bits = 64;
    uint32_t word_num = (hash / bloom_mask_bits)& g_hash.maskswords;
    uint64_t bloom_word = g_hash.gnu_bloom_filter_[word_num];
 
    if( (1&(bloom_word>>(hash%bloom_mask_bits)) & (bloom_word>>(h2%bloom_mask_bits))) == 0 )
    {
        //不在模块内
        return NULL;
    }
 
    uint32_t n = g_hash.gnu_bucket_[hash % g_hash.nbucket];
 
    do
    {
        Elf64_Sym* s = pSymTable+n;
        if( ((g_hash.gnu_chain_[n] ^ hash) >> 1) == 0 &&
            strcmp(pStrTable+s->st_name, szName)==0 )
            {
                //找到了,返回地址
                return pBase+s->st_value;
            }
 
    } while ((g_hash.gnu_chain_[n++]&1) == 0);
}
 
void* MyDlopen(const char* szName)
{
    //读取文件、文件头和段表
    FILE* file = fopen(szName,"rb");
    if(file == NULL)
    {
        printf("open file error!\n");
        return 0;
    }
 
    Elf64_Ehdr hdr = {0};               //elf hander
    if(fread(&hdr, 1, sizeof(hdr), file) != sizeof(hdr))
    {
        printf("read Ehdr error!\n");
        return 0;
    }
 
    size_t nSizeOfPhdrs = hdr.e_phentsize*hdr.e_phnum;   //段表大小
    Elf64_Phdr* phdrs = (Elf64_Phdr*)malloc(nSizeOfPhdrs);
    if( fread(phdrs, 1, nSizeOfPhdrs,file) != nSizeOfPhdrs)
    {
        printf("read phdr error!\n");
        return 0;
    }
     
    //fclose(file);
 
    //申请内存、映射 
    size_t nLoadSize = 0;
    for(int i=hdr.e_phnum-1;i>=0;i--)
    {
        if(phdrs[i].p_type == PT_LOAD)
        {
            nLoadSize = ((phdrs[i].p_vaddr+phdrs[i].p_memsz+PAGE_SIZE-1)/PAGE_SIZE)*PAGE_SIZE;    //需要申请的内存大小
            break;
        }
    }
 
    uint8_t* pBase = mmap64(NULL, nLoadSize, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);  //申请空间
    if(pBase == MAP_FAILED)
    {
        printf("mmap64 error: %s\n",strerror(errno));
        return 0;
    }
 
 
    for(size_t i=0;i<hdr.e_phnum;i++)
    {
        if(phdrs[i].p_type == PT_LOAD)
        {
            fseek(file, phdrs[i].p_offset,SEEK_SET);
            fread(pBase+phdrs[i].p_vaddr, 1 , phdrs[i].p_filesz,file);    //读取文件到内存
        }
    }
    free(phdrs);
 
    // 定位动态段
    phdrs = (Elf64_Phdr*)(pBase+hdr.e_phoff);

[培训]内核驱动高级班,冲击BAT一流互联网大厂工作,每周日13:00-18:00直播授课

最后于 2024-4-8 15:42 被Tom-gogo编辑 ,原因:
收藏
免费 6
支持
分享
最新回复 (15)
雪    币: 4724
活跃值: (6994)
能力值: ( LV2,RANK:10 )
在线值:
发帖
回帖
粉丝
2
可以哦,虽然有点基础
2024-3-29 21:37
0
雪    币: 501
能力值: ( LV1,RANK:0 )
在线值:
发帖
回帖
粉丝
3
huangjw 可以哦,虽然有点基础
初学者,而且是第一次发帖,见谅
2024-3-29 22:33
0
雪    币: 251
活跃值: (579)
能力值: ( LV2,RANK:10 )
在线值:
发帖
回帖
粉丝
4
谢谢,学习了
2024-4-4 09:00
0
雪    币: 102
活跃值: (2280)
能力值: ( LV4,RANK:50 )
在线值:
发帖
回帖
粉丝
5
mark,有完整的内存加载模块的源码吗?
2024-4-6 18:31
0
雪    币: 4183
活跃值: (31196)
能力值: ( LV2,RANK:10 )
在线值:
发帖
回帖
粉丝
6
感谢分享
2024-4-7 09:23
1
雪    币: 24
活跃值: (1579)
能力值: ( LV2,RANK:10 )
在线值:
发帖
回帖
粉丝
7
 麻烦 标题能不能修正下 dlopen 不是 dlopne,看到时我一直在想open这个单词该怎么拼
2024-4-7 09:59
0
雪    币: 501
能力值: ( LV1,RANK:0 )
在线值:
发帖
回帖
粉丝
8
dreameriii [em_63] 麻烦 标题能不能修正下 dlopen 不是 dlopne,看到时我一直在想open这个单词该怎么拼[em_78]
感谢指正
2024-4-8 15:43
0
雪    币: 801
活跃值: (1053)
能力值: ( LV3,RANK:30 )
在线值:
发帖
回帖
粉丝
9
期待剩下两部
2024-4-8 18:20
0
雪    币: 501
能力值: ( LV1,RANK:0 )
在线值:
发帖
回帖
粉丝
10
time.time 期待剩下两部[em_86]
最近在找工作,等稳定下来就更新
2024-4-8 20:31
0
雪    币: 600
活跃值: (1047)
能力值: ( LV3,RANK:20 )
在线值:
发帖
回帖
粉丝
11
这个代码看着好熟悉,不会是老王的吧
2024-4-13 11:54
0
雪    币: 501
能力值: ( LV1,RANK:0 )
在线值:
发帖
回帖
粉丝
12
mb_kbkqyusp 这个代码看着好熟悉,不会是老王的吧[em_13]

kr?

最后于 2024-4-13 22:52 被Tom-gogo编辑 ,原因:
2024-4-13 22:41
0
雪    币: 1440
活跃值: (1401)
能力值: ( LV3,RANK:23 )
在线值:
发帖
回帖
粉丝
13

感谢分享,刚好正在学ELF文件格式

最后于 2024-4-24 10:45 被Black貓①呺编辑 ,原因:
2024-4-24 10:38
0
雪    币: 10
能力值: ( LV1,RANK:0 )
在线值:
发帖
回帖
粉丝
14
长知识了,感谢分享,期待接下来的文章。
2024-12-9 17:06
0
雪    币: 17
能力值: ( LV1,RANK:0 )
在线值:
发帖
回帖
粉丝
15
老铁,赶紧更新
2024-12-17 18:00
0
雪    币: 10
能力值: ( LV1,RANK:0 )
在线值:
发帖
回帖
粉丝
16
期待大佬更新
2025-1-7 12:38
0
游客
登录 | 注册 方可回帖
返回