-
-
[原创]菜鸟读capstone与keystone源码入门
-
发表于: 2020-3-29 17:08 8352
-
菜鸟最近想入门ollvm分析,得先找个反编译器,看了斑竹大佬的
各种开源汇编、反汇编引擎的非专业比较https://bbs.pediy.com/thread-205590.htm,决定入门capstone
搜了搜网上,可能这玩意太简单,大佬都是直接用,只有c的入门,没啥python的入门帖子,菜鸟对各种函数参数还是没搞太明白,只好自己看下源码,记录一下用法。。。
一、capstone 反汇编引擎 ,最重要功能是把二进制转化为汇编语言,关键代码 在capstone 包里的__init__.py
最重要的2个类Cs和CsInsn
Cs类的disasm是最重要的反汇编函数,我得先搞明白它的参数和返回
它调用了封装的c函数 cs_disasm,先通过c函数cs_disasm解析code,获得一个all_insn引用,然后通过过yield 输出CsInsn 的实例
看一下 CsInsn类的__init__,可以看到,类型为ctypes.POINTER转换的_cs_insn这个c结构体的all_insn引作为all_info参数被传递给了 CsInsn类的_raw字段
举几个例子
blx #0x2274 的groups是[7, 150, 138, 149, 2, 1],分别代表[branch_relative,thumb,v5t,notmclass,call,jump]
# Disassemble binary & return disassembled instructions in CsInsn objects def disasm(self, code, offset, count=0): all_insn = ctypes.POINTER(_cs_insn)()#_cs_insn缩写自_capstone_instruction, 是一个c的结构体;这句创建all_insn指针,指向_cs_insn类型 '''if not _python2: print(code) code = code.encode() print(code)''' # Hack, unicorn's memory accessors give you back bytearrays, but they # cause TypeErrors when you hand them into Capstone. if isinstance(code, bytearray): code = bytes(code) res = _cs.cs_disasm(self.csh, code, len(code), offset, count, ctypes.byref(all_insn))#通过c函数cs_disasm解析code,获得一个all_insn引用(byref类似于pointer,返回一个引用) if res > 0: try: for i in range(res): yield CsInsn(self, all_insn[i])#看下构造函数def __init__(self, cs, all_info)可以看出self._raw=all_info=all_insn[i],把all_insn这个_cs_insn结构传递给了_raw,所以_raw储存了address,mnemonic,op_str等关键信息 finally: _cs.cs_free(all_insn, res)#通过yield输出完CsInsn实例后释放 else: status = _cs.cs_errno(self.csh) if status != CS_ERR_OK: raise CsError(status) return yield
def __init__(self, cs, all_info): self._raw = copy_ctypes(all_info)#这个_raw的值就是disasm函数获得的all_insn引用 self._cs = cs if self._cs._detail and self._raw.id != 0: # save detail self._raw.detail = ctypes.pointer(all_info.detail._type_()) ctypes.memmove(ctypes.byref(self._raw.detail[0]), ctypes.byref(all_info.detail[0]), ctypes.sizeof(type(all_info.detail[0])))
class _cs_insn(ctypes.Structure): _fields_ = ( ('id', ctypes.c_uint), ('address', ctypes.c_uint64), ('size', ctypes.c_uint16), ('bytes', ctypes.c_ubyte * 16), ('mnemonic', ctypes.c_char * 32), ('op_str', ctypes.c_char * 160), ('detail', ctypes.POINTER(_cs_detail)), ) class _cs_detail(ctypes.Structure): _fields_ = ( ('regs_read', ctypes.c_uint16 * 12), ('regs_read_count', ctypes.c_ubyte), ('regs_write', ctypes.c_uint16 * 20), ('regs_write_count', ctypes.c_ubyte), ('groups', ctypes.c_ubyte * 8), ('groups_count', ctypes.c_ubyte), ('arch', _cs_arch), )
def address(self): return self._raw.address
举几个例子
blx #0x2274 的groups是[7, 150, 138, 149, 2, 1],分别代表[branch_relative,thumb,v5t,notmclass,call,jump]
subs r0, r1, r0 的groups是[150, 151],分别代表[ thumb , thumb1only ]# Common instruction groups - to be consistent across all architectures. CS_GRP_INVALID = 0 # uninitialized/invalid group. CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) CS_GRP_CALL = 2 # all call instructions CS_GRP_RET = 3 # all return instructions CS_GRP_INT = 4 # all interrupt instructions (int+syscall) CS_GRP_IRET = 5 # all interrupt return instructions CS_GRP_PRIVILEGE = 6 # all privileged instructions
if (1 in i.groups and 2 not in i.groups): print("0x%x:\t%s\t%s\n" %(i.address, i.mnemonic, i.op_str)) print("\t%s\n" %( i.groups))
# Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4
#创建输入 bin= open('/src/main/lib/armeabi/libshell-super.2019.so','rb').read() start=0x0000307C end=0x00004df4 #导入capstone新建Cs实例,安卓一般都是arm的 import capstone cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) cs.detail = True#打开了detail 才有detail的功能 #打印地址,操作码,操作数 for i in cs.disasm(bin[start:end],start): print("0x%x:\t%s\t%s\n" %(i.address, i.mnemonic, i.op_str)) #打印groups # print("\t%s\n" %( i.groups)) # for a in i.groups: # print(i.group_name(a)) #打印regs_read与regs_write print("0x%x:\t%s\t%s\n" %(i.address, i.regs_read, i.regs_write)) for a in i.regs_read: print("regs_read:"+i.reg_name(a)) for a in i.regs_write: print("regs_write:"+i.reg_name(a))
def asm(self, string, addr=0, as_bytes=False): encode = POINTER(c_ubyte)() encode_size = c_size_t() stat_count = c_size_t() if not isinstance(string, bytes) and isinstance(string, str): string = string.encode('ascii') status = _ks.ks_asm(self._ksh, string, addr, byref(encode), byref(encode_size), byref(stat_count))#这里调用c函数ks_asm获得encode,encode_size的引用 if (status != 0): errno = _ks.ks_errno(self._ksh) raise KsError(errno, stat_count.value) else: if stat_count.value == 0: return (None, 0) else: if as_bytes: encoding = string_at(encode, encode_size.value) else: encoding = [] for i in range(encode_size.value): encoding.append(encode[i]) #把所有encode附加到encoding作为返回值 _ks.ks_free(encode) return (encoding, stat_count.value)
def __init__(self, arch, mode): # verify version compatibility with the core before doing anything (major, minor, _combined) = ks_version() if major != KS_API_MAJOR or minor != KS_API_MINOR:#先判断下当前版本和核心api是否匹配 self._ksh = None # our binding version is different from the core's API version raise KsError(KS_ERR_VERSION) self._arch, self._mode = arch, mode self._ksh = c_void_p() status = _ks.ks_open(arch, mode, byref(self._ksh))#根据参数arch和mode通过c函数ks_open打开ks_engine,获得_ksh的引用 if status != KS_ERR_OK: self._ksh = None raise KsError(status) if arch == KS_ARCH_X86: # Intel syntax is default for X86 self._syntax = KS_OPT_SYNTAX_INTEL else: self._syntax = None
# setup all the function prototype def _setup_prototype(lib, fname, restype, *argtypes): getattr(lib, fname).restype = restype getattr(lib, fname).argtypes = argtypes kserr = c_int ks_engine = c_void_p ks_hook_h = c_size_t _setup_prototype(_ks, "ks_version", c_uint, POINTER(c_int), POINTER(c_int)) _setup_prototype(_ks, "ks_arch_supported", c_bool, c_int) _setup_prototype(_ks, "ks_open", kserr, c_uint, c_uint, POINTER(ks_engine)) _setup_prototype(_ks, "ks_close", kserr, ks_engine) _setup_prototype(_ks, "ks_strerror", c_char_p, kserr) _setup_prototype(_ks, "ks_errno", kserr, ks_engine) _setup_prototype(_ks, "ks_option", kserr, ks_engine, c_int, c_void_p) _setup_prototype(_ks, "ks_asm", c_int, ks_engine, c_char_p, c_uint64, POINTER(POINTER(c_ubyte)), POINTER(c_size_t), POINTER(c_size_t)) _setup_prototype(_ks, "ks_free", None, POINTER(c_ubyte))
from keystone import * ks = Ks(KS_ARCH_ARM, KS_MODE_ARM) code=b"sub r1, r2, r5" encoding, count =ks.asm(code) print("%s = [ " % code, end='') for i in encoding: print("%02x " % i, end='') print("]")
class _cs_insn(ctypes.Structure): _fields_ = ( ('id', ctypes.c_uint), ('address', ctypes.c_uint64), ('size', ctypes.c_uint16), ('bytes', ctypes.c_ubyte * 16), ('mnemonic', ctypes.c_char * 32), ('op_str', ctypes.c_char * 160), ('detail', ctypes.POINTER(_cs_detail)), ) class _cs_detail(ctypes.Structure): _fields_ = ( ('regs_read', ctypes.c_uint16 * 12), ('regs_read_count', ctypes.c_ubyte), ('regs_write', ctypes.c_uint16 * 20), ('regs_write_count', ctypes.c_ubyte), ('groups', ctypes.c_ubyte * 8), ('groups_count', ctypes.c_ubyte), ('arch', _cs_arch), )
def address(self): return self._raw.address
举几个例子
blx #0x2274 的groups是[7, 150, 138, 149, 2, 1],分别代表[branch_relative,thumb,v5t,notmclass,call,jump]
subs r0, r1, r0 的groups是[150, 151],分别代表[ thumb , thumb1only ]# Common instruction groups - to be consistent across all architectures. CS_GRP_INVALID = 0 # uninitialized/invalid group. CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) CS_GRP_CALL = 2 # all call instructions CS_GRP_RET = 3 # all return instructions CS_GRP_INT = 4 # all interrupt instructions (int+syscall) CS_GRP_IRET = 5 # all interrupt return instructions CS_GRP_PRIVILEGE = 6 # all privileged instructions
if (1 in i.groups and 2 not in i.groups): print("0x%x:\t%s\t%s\n" %(i.address, i.mnemonic, i.op_str)) print("\t%s\n" %( i.groups))
# Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4
#创建输入 bin= open('/src/main/lib/armeabi/libshell-super.2019.so','rb').read() start=0x0000307C end=0x00004df4 #导入capstone新建Cs实例,安卓一般都是arm的 import capstone cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) cs.detail = True#打开了detail 才有detail的功能 #打印地址,操作码,操作数 for i in cs.disasm(bin[start:end],start): print("0x%x:\t%s\t%s\n" %(i.address, i.mnemonic, i.op_str)) #打印groups # print("\t%s\n" %( i.groups)) # for a in i.groups: # print(i.group_name(a)) #打印regs_read与regs_write print("0x%x:\t%s\t%s\n" %(i.address, i.regs_read, i.regs_write)) for a in i.regs_read: print("regs_read:"+i.reg_name(a)) for a in i.regs_write: print("regs_write:"+i.reg_name(a))
def asm(self, string, addr=0, as_bytes=False): encode = POINTER(c_ubyte)() encode_size = c_size_t() stat_count = c_size_t() if not isinstance(string, bytes) and isinstance(string, str): string = string.encode('ascii') status = _ks.ks_asm(self._ksh, string, addr, byref(encode), byref(encode_size), byref(stat_count))#这里调用c函数ks_asm获得encode,encode_size的引用 if (status != 0): errno = _ks.ks_errno(self._ksh) raise KsError(errno, stat_count.value) else: if stat_count.value == 0: return (None, 0) else: if as_bytes: encoding = string_at(encode, encode_size.value) else: encoding = [] for i in range(encode_size.value): encoding.append(encode[i]) #把所有encode附加到encoding作为返回值 _ks.ks_free(encode) return (encoding, stat_count.value)
def __init__(self, arch, mode): # verify version compatibility with the core before doing anything (major, minor, _combined) = ks_version() if major != KS_API_MAJOR or minor != KS_API_MINOR:#先判断下当前版本和核心api是否匹配 self._ksh = None # our binding version is different from the core's API version raise KsError(KS_ERR_VERSION) self._arch, self._mode = arch, mode self._ksh = c_void_p() status = _ks.ks_open(arch, mode, byref(self._ksh))#根据参数arch和mode通过c函数ks_open打开ks_engine,获得_ksh的引用 if status != KS_ERR_OK: self._ksh = None raise KsError(status) if arch == KS_ARCH_X86: # Intel syntax is default for X86 self._syntax = KS_OPT_SYNTAX_INTEL else: self._syntax = None
# setup all the function prototype def _setup_prototype(lib, fname, restype, *argtypes): getattr(lib, fname).restype = restype getattr(lib, fname).argtypes = argtypes kserr = c_int ks_engine = c_void_p ks_hook_h = c_size_t _setup_prototype(_ks, "ks_version", c_uint, POINTER(c_int), POINTER(c_int)) _setup_prototype(_ks, "ks_arch_supported", c_bool, c_int) _setup_prototype(_ks, "ks_open", kserr, c_uint, c_uint, POINTER(ks_engine)) _setup_prototype(_ks, "ks_close", kserr, ks_engine) _setup_prototype(_ks, "ks_strerror", c_char_p, kserr) _setup_prototype(_ks, "ks_errno", kserr, ks_engine) _setup_prototype(_ks, "ks_option", kserr, ks_engine, c_int, c_void_p) _setup_prototype(_ks, "ks_asm", c_int, ks_engine, c_char_p, c_uint64, POINTER(POINTER(c_ubyte)), POINTER(c_size_t), POINTER(c_size_t)) _setup_prototype(_ks, "ks_free", None, POINTER(c_ubyte))
from keystone import * ks = Ks(KS_ARCH_ARM, KS_MODE_ARM) code=b"sub r1, r2, r5" encoding, count =ks.asm(code) print("%s = [ " % code, end='') for i in encoding: print("%02x " % i, end='') print("]")
def address(self): return self._raw.address
举几个例子
blx #0x2274 的groups是[7, 150, 138, 149, 2, 1],分别代表[branch_relative,thumb,v5t,notmclass,call,jump]
subs r0, r1, r0 的groups是[150, 151],分别代表[ thumb , thumb1only ]# Common instruction groups - to be consistent across all architectures. CS_GRP_INVALID = 0 # uninitialized/invalid group. CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) CS_GRP_CALL = 2 # all call instructions CS_GRP_RET = 3 # all return instructions CS_GRP_INT = 4 # all interrupt instructions (int+syscall) CS_GRP_IRET = 5 # all interrupt return instructions CS_GRP_PRIVILEGE = 6 # all privileged instructions
if (1 in i.groups and 2 not in i.groups): print("0x%x:\t%s\t%s\n" %(i.address, i.mnemonic, i.op_str)) print("\t%s\n" %( i.groups))
# Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4
#创建输入 bin= open('/src/main/lib/armeabi/libshell-super.2019.so','rb').read() start=0x0000307C end=0x00004df4 #导入capstone新建Cs实例,安卓一般都是arm的 import capstone cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) cs.detail = True#打开了detail 才有detail的功能 #打印地址,操作码,操作数 for i in cs.disasm(bin[start:end],start): print("0x%x:\t%s\t%s\n" %(i.address, i.mnemonic, i.op_str)) #打印groups # print("\t%s\n" %( i.groups)) # for a in i.groups: # print(i.group_name(a)) #打印regs_read与regs_write print("0x%x:\t%s\t%s\n" %(i.address, i.regs_read, i.regs_write)) for a in i.regs_read: print("regs_read:"+i.reg_name(a)) for a in i.regs_write: print("regs_write:"+i.reg_name(a))
def asm(self, string, addr=0, as_bytes=False): encode = POINTER(c_ubyte)() encode_size = c_size_t() stat_count = c_size_t() if not isinstance(string, bytes) and isinstance(string, str): string = string.encode('ascii') status = _ks.ks_asm(self._ksh, string, addr, byref(encode), byref(encode_size), byref(stat_count))#这里调用c函数ks_asm获得encode,encode_size的引用 if (status != 0): errno = _ks.ks_errno(self._ksh) raise KsError(errno, stat_count.value) else: if stat_count.value == 0: return (None, 0) else: if as_bytes: encoding = string_at(encode, encode_size.value) else: encoding = [] for i in range(encode_size.value): encoding.append(encode[i]) #把所有encode附加到encoding作为返回值 _ks.ks_free(encode) return (encoding, stat_count.value)
def __init__(self, arch, mode): # verify version compatibility with the core before doing anything (major, minor, _combined) = ks_version() if major != KS_API_MAJOR or minor != KS_API_MINOR:#先判断下当前版本和核心api是否匹配 self._ksh = None # our binding version is different from the core's API version raise KsError(KS_ERR_VERSION) self._arch, self._mode = arch, mode self._ksh = c_void_p() status = _ks.ks_open(arch, mode, byref(self._ksh))#根据参数arch和mode通过c函数ks_open打开ks_engine,获得_ksh的引用 if status != KS_ERR_OK: self._ksh = None raise KsError(status) if arch == KS_ARCH_X86: # Intel syntax is default for X86 self._syntax = KS_OPT_SYNTAX_INTEL else: self._syntax = None
# setup all the function prototype def _setup_prototype(lib, fname, restype, *argtypes): getattr(lib, fname).restype = restype getattr(lib, fname).argtypes = argtypes kserr = c_int ks_engine = c_void_p ks_hook_h = c_size_t _setup_prototype(_ks, "ks_version", c_uint, POINTER(c_int), POINTER(c_int)) _setup_prototype(_ks, "ks_arch_supported", c_bool, c_int) _setup_prototype(_ks, "ks_open", kserr, c_uint, c_uint, POINTER(ks_engine)) _setup_prototype(_ks, "ks_close", kserr, ks_engine) _setup_prototype(_ks, "ks_strerror", c_char_p, kserr) _setup_prototype(_ks, "ks_errno", kserr, ks_engine) _setup_prototype(_ks, "ks_option", kserr, ks_engine, c_int, c_void_p) _setup_prototype(_ks, "ks_asm", c_int, ks_engine, c_char_p, c_uint64, POINTER(POINTER(c_ubyte)), POINTER(c_size_t), POINTER(c_size_t)) _setup_prototype(_ks, "ks_free", None, POINTER(c_ubyte))
from keystone import * ks = Ks(KS_ARCH_ARM, KS_MODE_ARM) code=b"sub r1, r2, r5" encoding, count =ks.asm(code) print("%s = [ " % code, end='') for i in encoding: print("%02x " % i, end='') print("]")
# Common instruction groups - to be consistent across all architectures. CS_GRP_INVALID = 0 # uninitialized/invalid group. CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) CS_GRP_CALL = 2 # all call instructions CS_GRP_RET = 3 # all return instructions CS_GRP_INT = 4 # all interrupt instructions (int+syscall) CS_GRP_IRET = 5 # all interrupt return instructions CS_GRP_PRIVILEGE = 6 # all privileged instructions
if (1 in i.groups and 2 not in i.groups): print("0x%x:\t%s\t%s\n" %(i.address, i.mnemonic, i.op_str)) print("\t%s\n" %( i.groups))
if (1 in i.groups and 2 not in i.groups): print("0x%x:\t%s\t%s\n" %(i.address, i.mnemonic, i.op_str)) print("\t%s\n" %( i.groups))
# Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4
# Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4
[培训]内核驱动高级班,冲击BAT一流互联网大厂工作,每周日13:00-18:00直播授课
赞赏
- cocos2d逆向入门和某捕鱼游戏分析 26607
- [原创]capstone2llvmir入门---如何把汇编转换为llvmir 20913
- [原创]利用编译器优化干掉控制流平坦化flatten 40631
- [求助][原创]利用编译器优化干掉虚假控制流 14970
- [求助][原创]对类抽取加固的一点尝试与遇到的问题 7912