-
-
[原创]菜鸟读capstone与keystone源码入门
-
发表于: 2020-3-29 17:08 8824
-
菜鸟最近想入门ollvm分析,得先找个反编译器,看了斑竹大佬的
各种开源汇编、反汇编引擎的非专业比较https://bbs.pediy.com/thread-205590.htm,决定入门capstone
搜了搜网上,可能这玩意太简单,大佬都是直接用,只有c的入门,没啥python的入门帖子,菜鸟对各种函数参数还是没搞太明白,只好自己看下源码,记录一下用法。。。
一、capstone 反汇编引擎 ,最重要功能是把二进制转化为汇编语言,关键代码 在capstone 包里的__init__.py
最重要的2个类Cs和CsInsn
Cs类的disasm是最重要的反汇编函数,我得先搞明白它的参数和返回
它调用了封装的c函数 cs_disasm,先通过c函数cs_disasm解析code,获得一个all_insn引用,然后通过过yield 输出CsInsn 的实例
看一下 CsInsn类的__init__,可以看到,类型为ctypes.POINTER转换的_cs_insn这个c结构体的all_insn引作为all_info参数被传递给了 CsInsn类的_raw字段
举几个例子
blx #0x2274 的groups是[7, 150, 138, 149, 2, 1],分别代表[branch_relative,thumb,v5t,notmclass,call,jump]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | # Disassemble binary & return disassembled instructions in CsInsn objects def disasm( self , code, offset, count = 0 ): all_insn = ctypes.POINTER(_cs_insn)() #_cs_insn缩写自_capstone_instruction, 是一个c的结构体;这句创建all_insn指针,指向_cs_insn类型 '''if not _python2: print(code) code = code.encode() print(code)''' # Hack, unicorn's memory accessors give you back bytearrays, but they # cause TypeErrors when you hand them into Capstone. if isinstance (code, bytearray): code = bytes(code) res = _cs.cs_disasm( self .csh, code, len (code), offset, count, ctypes.byref(all_insn)) #通过c函数cs_disasm解析code,获得一个all_insn引用(byref类似于pointer,返回一个引用) if res > 0 : try : for i in range (res): yield CsInsn( self , all_insn[i]) #看下构造函数def __init__(self, cs, all_info)可以看出self._raw=all_info=all_insn[i],把all_insn这个_cs_insn结构传递给了_raw,所以_raw储存了address,mnemonic,op_str等关键信息 finally : _cs.cs_free(all_insn, res) #通过yield输出完CsInsn实例后释放 else : status = _cs.cs_errno( self .csh) if status ! = CS_ERR_OK: raise CsError(status) return yield |
1 2 3 4 5 6 7 | def __init__( self , cs, all_info): self ._raw = copy_ctypes(all_info) #这个_raw的值就是disasm函数获得的all_insn引用 self ._cs = cs if self ._cs._detail and self ._raw. id ! = 0 : # save detail self ._raw.detail = ctypes.pointer(all_info.detail._type_()) ctypes.memmove(ctypes.byref( self ._raw.detail[ 0 ]), ctypes.byref(all_info.detail[ 0 ]), ctypes.sizeof( type (all_info.detail[ 0 ]))) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | class _cs_insn(ctypes.Structure): _fields_ = ( ( 'id' , ctypes.c_uint), ( 'address' , ctypes.c_uint64), ( 'size' , ctypes.c_uint16), ( 'bytes' , ctypes.c_ubyte * 16 ), ( 'mnemonic' , ctypes.c_char * 32 ), ( 'op_str' , ctypes.c_char * 160 ), ( 'detail' , ctypes.POINTER(_cs_detail)), ) class _cs_detail(ctypes.Structure): _fields_ = ( ( 'regs_read' , ctypes.c_uint16 * 12 ), ( 'regs_read_count' , ctypes.c_ubyte), ( 'regs_write' , ctypes.c_uint16 * 20 ), ( 'regs_write_count' , ctypes.c_ubyte), ( 'groups' , ctypes.c_ubyte * 8 ), ( 'groups_count' , ctypes.c_ubyte), ( 'arch' , _cs_arch), ) |
1 2 | def address( self ): return self ._raw.address |
举几个例子
blx #0x2274 的groups是[7, 150, 138, 149, 2, 1],分别代表[branch_relative,thumb,v5t,notmclass,call,jump]
subs r0, r1, r0 的groups是[150, 151],分别代表[ thumb , thumb1only ]1 2 3 4 5 6 7 8 | # Common instruction groups - to be consistent across all architectures. CS_GRP_INVALID = 0 # uninitialized/invalid group. CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) CS_GRP_CALL = 2 # all call instructions CS_GRP_RET = 3 # all return instructions CS_GRP_INT = 4 # all interrupt instructions (int+syscall) CS_GRP_IRET = 5 # all interrupt return instructions CS_GRP_PRIVILEGE = 6 # all privileged instructions |
1 2 3 | if ( 1 in i.groups and 2 not in i.groups): print ( "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str)) print ( "\t%s\n" % ( i.groups)) |
1 2 3 4 5 6 | # Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | #创建输入 bin = open ( '/src/main/lib/armeabi/libshell-super.2019.so' , 'rb' ).read() start = 0x0000307C end = 0x00004df4 #导入capstone新建Cs实例,安卓一般都是arm的 import capstone cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) cs.detail = True #打开了detail 才有detail的功能 #打印地址,操作码,操作数 for i in cs.disasm( bin [start:end],start): print ( "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str)) #打印groups # print("\t%s\n" %( i.groups)) # for a in i.groups: # print(i.group_name(a)) #打印regs_read与regs_write print ( "0x%x:\t%s\t%s\n" % (i.address, i.regs_read, i.regs_write)) for a in i.regs_read: print ( "regs_read:" + i.reg_name(a)) for a in i.regs_write: print ( "regs_write:" + i.reg_name(a)) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | def asm( self , string, addr = 0 , as_bytes = False ): encode = POINTER(c_ubyte)() encode_size = c_size_t() stat_count = c_size_t() if not isinstance (string, bytes) and isinstance (string, str ): string = string.encode( 'ascii' ) status = _ks.ks_asm( self ._ksh, string, addr, byref(encode), byref(encode_size), byref(stat_count)) #这里调用c函数ks_asm获得encode,encode_size的引用 if (status ! = 0 ): errno = _ks.ks_errno( self ._ksh) raise KsError(errno, stat_count.value) else : if stat_count.value = = 0 : return ( None , 0 ) else : if as_bytes: encoding = string_at(encode, encode_size.value) else : encoding = [] for i in range (encode_size.value): encoding.append(encode[i]) #把所有encode附加到encoding作为返回值 _ks.ks_free(encode) return (encoding, stat_count.value) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | def __init__( self , arch, mode): # verify version compatibility with the core before doing anything (major, minor, _combined) = ks_version() if major ! = KS_API_MAJOR or minor ! = KS_API_MINOR: #先判断下当前版本和核心api是否匹配 self ._ksh = None # our binding version is different from the core's API version raise KsError(KS_ERR_VERSION) self ._arch, self ._mode = arch, mode self ._ksh = c_void_p() status = _ks.ks_open(arch, mode, byref( self ._ksh)) #根据参数arch和mode通过c函数ks_open打开ks_engine,获得_ksh的引用 if status ! = KS_ERR_OK: self ._ksh = None raise KsError(status) if arch = = KS_ARCH_X86: # Intel syntax is default for X86 self ._syntax = KS_OPT_SYNTAX_INTEL else : self ._syntax = None |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | # setup all the function prototype def _setup_prototype(lib, fname, restype, * argtypes): getattr (lib, fname).restype = restype getattr (lib, fname).argtypes = argtypes kserr = c_int ks_engine = c_void_p ks_hook_h = c_size_t _setup_prototype(_ks, "ks_version" , c_uint, POINTER(c_int), POINTER(c_int)) _setup_prototype(_ks, "ks_arch_supported" , c_bool, c_int) _setup_prototype(_ks, "ks_open" , kserr, c_uint, c_uint, POINTER(ks_engine)) _setup_prototype(_ks, "ks_close" , kserr, ks_engine) _setup_prototype(_ks, "ks_strerror" , c_char_p, kserr) _setup_prototype(_ks, "ks_errno" , kserr, ks_engine) _setup_prototype(_ks, "ks_option" , kserr, ks_engine, c_int, c_void_p) _setup_prototype(_ks, "ks_asm" , c_int, ks_engine, c_char_p, c_uint64, POINTER(POINTER(c_ubyte)), POINTER(c_size_t), POINTER(c_size_t)) _setup_prototype(_ks, "ks_free" , None , POINTER(c_ubyte)) |
1 2 3 4 5 6 7 8 | from keystone import * ks = Ks(KS_ARCH_ARM, KS_MODE_ARM) code = b "sub r1, r2, r5" encoding, count = ks.asm(code) print ( "%s = [ " % code, end = '') for i in encoding: print ( "%02x " % i, end = '') print ( "]" ) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | class _cs_insn(ctypes.Structure): _fields_ = ( ( 'id' , ctypes.c_uint), ( 'address' , ctypes.c_uint64), ( 'size' , ctypes.c_uint16), ( 'bytes' , ctypes.c_ubyte * 16 ), ( 'mnemonic' , ctypes.c_char * 32 ), ( 'op_str' , ctypes.c_char * 160 ), ( 'detail' , ctypes.POINTER(_cs_detail)), ) class _cs_detail(ctypes.Structure): _fields_ = ( ( 'regs_read' , ctypes.c_uint16 * 12 ), ( 'regs_read_count' , ctypes.c_ubyte), ( 'regs_write' , ctypes.c_uint16 * 20 ), ( 'regs_write_count' , ctypes.c_ubyte), ( 'groups' , ctypes.c_ubyte * 8 ), ( 'groups_count' , ctypes.c_ubyte), ( 'arch' , _cs_arch), ) |
1 2 | def address( self ): return self ._raw.address |
举几个例子
blx #0x2274 的groups是[7, 150, 138, 149, 2, 1],分别代表[branch_relative,thumb,v5t,notmclass,call,jump]
subs r0, r1, r0 的groups是[150, 151],分别代表[ thumb , thumb1only ]1 2 3 4 5 6 7 8 | # Common instruction groups - to be consistent across all architectures. CS_GRP_INVALID = 0 # uninitialized/invalid group. CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) CS_GRP_CALL = 2 # all call instructions CS_GRP_RET = 3 # all return instructions CS_GRP_INT = 4 # all interrupt instructions (int+syscall) CS_GRP_IRET = 5 # all interrupt return instructions CS_GRP_PRIVILEGE = 6 # all privileged instructions |
1 2 3 | if ( 1 in i.groups and 2 not in i.groups): print ( "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str)) print ( "\t%s\n" % ( i.groups)) |
1 2 3 4 5 6 | # Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | #创建输入 bin = open ( '/src/main/lib/armeabi/libshell-super.2019.so' , 'rb' ).read() start = 0x0000307C end = 0x00004df4 #导入capstone新建Cs实例,安卓一般都是arm的 import capstone cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) cs.detail = True #打开了detail 才有detail的功能 #打印地址,操作码,操作数 for i in cs.disasm( bin [start:end],start): print ( "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str)) #打印groups # print("\t%s\n" %( i.groups)) # for a in i.groups: # print(i.group_name(a)) #打印regs_read与regs_write print ( "0x%x:\t%s\t%s\n" % (i.address, i.regs_read, i.regs_write)) for a in i.regs_read: print ( "regs_read:" + i.reg_name(a)) for a in i.regs_write: print ( "regs_write:" + i.reg_name(a)) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | def asm( self , string, addr = 0 , as_bytes = False ): encode = POINTER(c_ubyte)() encode_size = c_size_t() stat_count = c_size_t() if not isinstance (string, bytes) and isinstance (string, str ): string = string.encode( 'ascii' ) status = _ks.ks_asm( self ._ksh, string, addr, byref(encode), byref(encode_size), byref(stat_count)) #这里调用c函数ks_asm获得encode,encode_size的引用 if (status ! = 0 ): errno = _ks.ks_errno( self ._ksh) raise KsError(errno, stat_count.value) else : if stat_count.value = = 0 : return ( None , 0 ) else : if as_bytes: encoding = string_at(encode, encode_size.value) else : encoding = [] for i in range (encode_size.value): encoding.append(encode[i]) #把所有encode附加到encoding作为返回值 _ks.ks_free(encode) return (encoding, stat_count.value) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | def __init__( self , arch, mode): # verify version compatibility with the core before doing anything (major, minor, _combined) = ks_version() if major ! = KS_API_MAJOR or minor ! = KS_API_MINOR: #先判断下当前版本和核心api是否匹配 self ._ksh = None # our binding version is different from the core's API version raise KsError(KS_ERR_VERSION) self ._arch, self ._mode = arch, mode self ._ksh = c_void_p() status = _ks.ks_open(arch, mode, byref( self ._ksh)) #根据参数arch和mode通过c函数ks_open打开ks_engine,获得_ksh的引用 if status ! = KS_ERR_OK: self ._ksh = None raise KsError(status) if arch = = KS_ARCH_X86: # Intel syntax is default for X86 self ._syntax = KS_OPT_SYNTAX_INTEL else : self ._syntax = None |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | # setup all the function prototype def _setup_prototype(lib, fname, restype, * argtypes): getattr (lib, fname).restype = restype getattr (lib, fname).argtypes = argtypes kserr = c_int ks_engine = c_void_p ks_hook_h = c_size_t _setup_prototype(_ks, "ks_version" , c_uint, POINTER(c_int), POINTER(c_int)) _setup_prototype(_ks, "ks_arch_supported" , c_bool, c_int) _setup_prototype(_ks, "ks_open" , kserr, c_uint, c_uint, POINTER(ks_engine)) _setup_prototype(_ks, "ks_close" , kserr, ks_engine) _setup_prototype(_ks, "ks_strerror" , c_char_p, kserr) _setup_prototype(_ks, "ks_errno" , kserr, ks_engine) _setup_prototype(_ks, "ks_option" , kserr, ks_engine, c_int, c_void_p) _setup_prototype(_ks, "ks_asm" , c_int, ks_engine, c_char_p, c_uint64, POINTER(POINTER(c_ubyte)), POINTER(c_size_t), POINTER(c_size_t)) _setup_prototype(_ks, "ks_free" , None , POINTER(c_ubyte)) |
1 2 3 4 5 6 7 8 | from keystone import * ks = Ks(KS_ARCH_ARM, KS_MODE_ARM) code = b "sub r1, r2, r5" encoding, count = ks.asm(code) print ( "%s = [ " % code, end = '') for i in encoding: print ( "%02x " % i, end = '') print ( "]" ) |
1 2 | def address( self ): return self ._raw.address |
举几个例子
blx #0x2274 的groups是[7, 150, 138, 149, 2, 1],分别代表[branch_relative,thumb,v5t,notmclass,call,jump]
subs r0, r1, r0 的groups是[150, 151],分别代表[ thumb , thumb1only ]1 2 3 4 5 6 7 8 | # Common instruction groups - to be consistent across all architectures. CS_GRP_INVALID = 0 # uninitialized/invalid group. CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) CS_GRP_CALL = 2 # all call instructions CS_GRP_RET = 3 # all return instructions CS_GRP_INT = 4 # all interrupt instructions (int+syscall) CS_GRP_IRET = 5 # all interrupt return instructions CS_GRP_PRIVILEGE = 6 # all privileged instructions |
1 2 3 | if ( 1 in i.groups and 2 not in i.groups): print ( "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str)) print ( "\t%s\n" % ( i.groups)) |
1 2 3 4 5 6 | # Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | #创建输入 bin = open ( '/src/main/lib/armeabi/libshell-super.2019.so' , 'rb' ).read() start = 0x0000307C end = 0x00004df4 #导入capstone新建Cs实例,安卓一般都是arm的 import capstone cs = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) cs.detail = True #打开了detail 才有detail的功能 #打印地址,操作码,操作数 for i in cs.disasm( bin [start:end],start): print ( "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str)) #打印groups # print("\t%s\n" %( i.groups)) # for a in i.groups: # print(i.group_name(a)) #打印regs_read与regs_write print ( "0x%x:\t%s\t%s\n" % (i.address, i.regs_read, i.regs_write)) for a in i.regs_read: print ( "regs_read:" + i.reg_name(a)) for a in i.regs_write: print ( "regs_write:" + i.reg_name(a)) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | def asm( self , string, addr = 0 , as_bytes = False ): encode = POINTER(c_ubyte)() encode_size = c_size_t() stat_count = c_size_t() if not isinstance (string, bytes) and isinstance (string, str ): string = string.encode( 'ascii' ) status = _ks.ks_asm( self ._ksh, string, addr, byref(encode), byref(encode_size), byref(stat_count)) #这里调用c函数ks_asm获得encode,encode_size的引用 if (status ! = 0 ): errno = _ks.ks_errno( self ._ksh) raise KsError(errno, stat_count.value) else : if stat_count.value = = 0 : return ( None , 0 ) else : if as_bytes: encoding = string_at(encode, encode_size.value) else : encoding = [] for i in range (encode_size.value): encoding.append(encode[i]) #把所有encode附加到encoding作为返回值 _ks.ks_free(encode) return (encoding, stat_count.value) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | def __init__( self , arch, mode): # verify version compatibility with the core before doing anything (major, minor, _combined) = ks_version() if major ! = KS_API_MAJOR or minor ! = KS_API_MINOR: #先判断下当前版本和核心api是否匹配 self ._ksh = None # our binding version is different from the core's API version raise KsError(KS_ERR_VERSION) self ._arch, self ._mode = arch, mode self ._ksh = c_void_p() status = _ks.ks_open(arch, mode, byref( self ._ksh)) #根据参数arch和mode通过c函数ks_open打开ks_engine,获得_ksh的引用 if status ! = KS_ERR_OK: self ._ksh = None raise KsError(status) if arch = = KS_ARCH_X86: # Intel syntax is default for X86 self ._syntax = KS_OPT_SYNTAX_INTEL else : self ._syntax = None |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | # setup all the function prototype def _setup_prototype(lib, fname, restype, * argtypes): getattr (lib, fname).restype = restype getattr (lib, fname).argtypes = argtypes kserr = c_int ks_engine = c_void_p ks_hook_h = c_size_t _setup_prototype(_ks, "ks_version" , c_uint, POINTER(c_int), POINTER(c_int)) _setup_prototype(_ks, "ks_arch_supported" , c_bool, c_int) _setup_prototype(_ks, "ks_open" , kserr, c_uint, c_uint, POINTER(ks_engine)) _setup_prototype(_ks, "ks_close" , kserr, ks_engine) _setup_prototype(_ks, "ks_strerror" , c_char_p, kserr) _setup_prototype(_ks, "ks_errno" , kserr, ks_engine) _setup_prototype(_ks, "ks_option" , kserr, ks_engine, c_int, c_void_p) _setup_prototype(_ks, "ks_asm" , c_int, ks_engine, c_char_p, c_uint64, POINTER(POINTER(c_ubyte)), POINTER(c_size_t), POINTER(c_size_t)) _setup_prototype(_ks, "ks_free" , None , POINTER(c_ubyte)) |
1 2 3 4 5 6 7 8 | from keystone import * ks = Ks(KS_ARCH_ARM, KS_MODE_ARM) code = b "sub r1, r2, r5" encoding, count = ks.asm(code) print ( "%s = [ " % code, end = '') for i in encoding: print ( "%02x " % i, end = '') print ( "]" ) |
1 2 3 4 5 6 7 8 | # Common instruction groups - to be consistent across all architectures. CS_GRP_INVALID = 0 # uninitialized/invalid group. CS_GRP_JUMP = 1 # all jump instructions (conditional+direct+indirect jumps) CS_GRP_CALL = 2 # all call instructions CS_GRP_RET = 3 # all return instructions CS_GRP_INT = 4 # all interrupt instructions (int+syscall) CS_GRP_IRET = 5 # all interrupt return instructions CS_GRP_PRIVILEGE = 6 # all privileged instructions |
1 2 3 | if ( 1 in i.groups and 2 not in i.groups): print ( "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str)) print ( "\t%s\n" % ( i.groups)) |
1 2 3 | if ( 1 in i.groups and 2 not in i.groups): print ( "0x%x:\t%s\t%s\n" % (i.address, i.mnemonic, i.op_str)) print ( "\t%s\n" % ( i.groups)) |
1 2 3 4 5 6 | # Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4 |
1 2 3 4 5 6 | # Common instruction operand types - to be consistent across all architectures. CS_OP_INVALID = 0 CS_OP_REG = 1 CS_OP_IMM = 2 CS_OP_MEM = 3 CS_OP_FP = 4 |
[招生]科锐逆向工程师培训(2025年3月11日实地,远程教学同时开班, 第52期)!
赞赏
- cocos2d逆向入门和某捕鱼游戏分析 27346
- [原创]capstone2llvmir入门---如何把汇编转换为llvmir 21459
- [原创]利用编译器优化干掉控制流平坦化flatten 41108
- [求助][原创]利用编译器优化干掉虚假控制流 15356
- [求助][原创]对类抽取加固的一点尝试与遇到的问题 8186