用IPHONE看小说,时间稍长眼睛会酸。换到Kindle上,又在各种错别字、跳段、重复中纠结。
想想咱以前也是搞过游戏汉化的,不就是把文本从IPHONE版本的程序里拉出来嘛...
首先在程序安装目录一通找,例如我看的这个小说《斗破苍穹》发现文本位于目录:
/var/mobile/Applications/7DA01F1E-87FE-4DAC-9597-3174208BB7C4/CRForEPub-Reader.app/UnzippedEpub
/var/mobile/Applications/7DA01F1E-87FE-4DAC-9597-3174208BB7C4/CRForEPub-Reader.app/UnzippedEpub/1/OEBPS/Text root# ls
01.xhtml.encrypt* 17.xhtml.encrypt* 34.xhtml.encrypt* 51.xhtml.encrypt* 68.xhtml.encrypt* 85.xhtml.encrypt*
02.xhtml.encrypt* 18.xhtml.encrypt* 35.xhtml.encrypt* 52.xhtml.encrypt* 69.xhtml.encrypt* 86.xhtml.encrypt*
03.xhtml.encrypt* 19.xhtml.encrypt* 36.xhtml.encrypt* 53.xhtml.encrypt* 70.xhtml.encrypt* 87.xhtml.encrypt*
04.xhtml.encrypt* 20.xhtml.encrypt* 37.xhtml.encrypt* 54.xhtml.encrypt* 71.xhtml.encrypt* 88.xhtml.encrypt*
05.xhtml.encrypt* 21.xhtml.encrypt* 38.xhtml.encrypt* 55.xhtml.encrypt* 72.xhtml.encrypt* 89.xhtml.encrypt*
06.xhtml.encrypt* 22.xhtml.encrypt* 39.xhtml.encrypt* 56.xhtml.encrypt* 73.xhtml.encrypt* 90.xhtml.encrypt*
07.xhtml.encrypt* 23.xhtml.encrypt* 40.xhtml.encrypt* 57.xhtml.encrypt* 74.xhtml.encrypt* 91.xhtml.encrypt*
08.xhtml.encrypt* 24.xhtml.encrypt* 41.xhtml.encrypt* 58.xhtml.encrypt* 75.xhtml.encrypt* 92.xhtml.encrypt*
09.xhtml.encrypt* 25.xhtml.encrypt* 42.xhtml.encrypt* 59.xhtml.encrypt* 76.xhtml.encrypt* 93.xhtml.encrypt*
10.xhtml.encrypt* 26.xhtml.encrypt* 43.xhtml.encrypt* 60.xhtml.encrypt* 77.xhtml.encrypt* 94.xhtml.encrypt*
100.xhtml.encrypt* 27.xhtml.encrypt* 44.xhtml.encrypt* 61.xhtml.encrypt* 78.xhtml.encrypt* 95.xhtml.encrypt*
11.xhtml.encrypt* 28.xhtml.encrypt* 45.xhtml.encrypt* 62.xhtml.encrypt* 79.xhtml.encrypt* 96.xhtml.encrypt*
12.xhtml.encrypt* 29.xhtml.encrypt* 46.xhtml.encrypt* 63.xhtml.encrypt* 80.xhtml.encrypt* 97.xhtml.encrypt*
13.xhtml.encrypt* 30.xhtml.encrypt* 47.xhtml.encrypt* 64.xhtml.encrypt* 81.xhtml.encrypt* 98.xhtml.encrypt*
14.xhtml.encrypt* 31.xhtml.encrypt* 48.xhtml.encrypt* 65.xhtml.encrypt* 82.xhtml.encrypt* 99.xhtml.encrypt*
15.xhtml.encrypt* 32.xhtml.encrypt* 49.xhtml.encrypt* 66.xhtml.encrypt* 83.xhtml.encrypt* copyright.xhtml.encrypt*
16.xhtml.encrypt* 33.xhtml.encrypt* 50.xhtml.encrypt* 67.xhtml.encrypt* 84.xhtml.encrypt*
通过文件名后缀我们得知,下来的工作就是解密文本。
祭出IDA,打开主程序CRForEPub-Reader,首先感叹一下这么完整的符号表。然后目测到解密函数:
void *__fastcall _CREPubManager_decryptedContentForChapter__(void *a1)
{
void *v1; // r5@1
void *v2; // r4@1
void *v3; // r0@2
void *v4; // r4@2
void *v5; // r0@3
void *v6; // r0@3
v1 = 0;
v2 = objc_msgSend(a1, "encryptedFilePathForChapter:");
if ( (unsigned int)objc_msgSend(&OBJC_CLASS___FileUtil, "fileExistsAtPath:", v2) & 0xFF )
{
v1 = 0;
v3 = objc_msgSend(&OBJC_CLASS___NSData, "dataWithContentsOfFile:", v2);
v4 = objc_msgSend(v3, "aesDecryptWithKey:initialVector:", 0x5A654, 0);
if ( objc_msgSend(v4, "length") )
{
v5 = objc_msgSend(&OBJC_CLASS___NSString, "alloc");
v6 = objc_msgSend(v5, "initWithData:encoding:", v4, 4);
v1 = objc_msgSend(v6, "autorelease");
}
}
return v1;
}
哦,原来用的是aes,一路追下来找到最终调用的库函数CCCrypt
CCCryptorStatus CCCrypt(CCOperation op, CCAlgorithm alg, CCOptions options, const void *key, size_t keyLength,
const void *iv, const void *dataIn, size_t dataInLength, void *dataOut, size_t dataOutAvailable,
size_t *dataOutMoved);
温习下参数传递规则:R0,R1,R2,R3四个寄存器进行传递;若形参个数大于4,大于4的部分必须通过堆栈进行传递。
gdb在CCCrypt下断点:
(gdb) bt
#0 0x3a1db3c4 in CCCrypt ()
#1 0x00010c0a in -[NSData(NSDataExtension) aesDecryptWithKey:initialVector:] ()
#2 0x00010498 in +[CREPubManager decryptedContentForChapter:] ()
#3 0x0002b08c in -[CRDecryptChapterContent excute:] ()
#4 0x000146a4 in -[BasicAsyncCommand process] ()
#5 0x320ef9c4 in <redacted> ()
#6 0x32046fea in <redacted> ()
#7 0x329efb64 in <redacted> ()
#8 0x329795c0 in <redacted> ()
#9 0x329f1be2 in <redacted> ()
#10 0x3a22111e in <redacted> ()
#11 0x3a225960 in <redacted> ()
#12 0x3a225ac0 in <redacted> ()
#13 0x3a255a10 in <redacted> ()
#14 0x3a2558a4 in start_wqthread ()
(gdb) i r
r0 0x1 1
r1 0x0 0
r2 0x3 3
r3 0x2ff1eca4 804383908
r4 0x2ff1eca4 804383908
r5 0xa81000 11014144
r6 0x37baa9e3 934980067
r7 0x2ff1ecec 804383980
r8 0xa83800 11024384
r9 0x19 25
r10 0x2810 10256
r11 0x1bef80 1830784
r12 0x32962881 848701569
sp 0x2ff1ec88 804383880
lr 0x10c0b 68619
pc 0x3a1db3c4 975025092
cpsr {0x60000030, n = 0x0, z = 0x1, c = 0x1, v = 0x0, q = 0x0, j = 0x0, ge = 0x0, e = 0x0, a = 0x0, i = 0x0, f = 0x0,
t = 0x1, mode = 0x10} {0x60000030, n = 0, z = 1, c = 1, v = 0, q = 0, j = 0, ge = 0, e = 0, a = 0, i = 0, f = 0, t = 1,
mode = usr}
(gdb) x /10xw $sp
0x2ff1ec88: 0x00000018 0x00000000 0x00a81000 0x00002800
0x2ff1ec98: 0x00a83800 0x00002810 0x2ff1eccc 0x61646e73
0x2ff1eca8: 0x6f6c632e 0x72616475
(gdb) p (char *)$r3
$1 = 0x2ff1eca4 "snda.cloudary.epubreader"
于是我们知道函数的参数是这样填写的:
CCCrypt(1, 0, 3, "snda.cloudary.epubreader", 0x18, NULL, 0x00a81000, 0x00002800, 0x00a83800, 0x00002810, 0x2ff1eccc);
在inbuf里可以看到内容和xhtml.encrypt文件中一致。等函数CCCrypt执行完,检查outbuf:
(gdb) p (char *)0x00a83800
$2 = 0xa83800 "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\r\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\r\n \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\r\n\r\n<html xmlns=\"http://www.w3.org/1999/xhtm"...
(gdb) p (char *)0x00a83800+1000
$3 = 0xa83be8 "?一阵嘲讽的骚动。</p>\r\n\r\n <p>“三段?嘿嘿,果然不出我所料,这个“天才”这一年又是在原地踏步!”</p>\r\n\r\n <p>“哎,这废物真是把家族的脸都给丢兢...
(gdb)
呵呵,看到明文了。
然后把下面的程序在IPHONE上运行,out目录就是解密后的文本。
/*
* decryptedContent.c
* arm-apple-darwin9-gcc decryptedContent.c -o dc
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <sys/stat.h>
typedef unsigned int u32;
typedef unsigned short u16;
typedef unsigned char u8;
FILE *open_file(char *name, int *size)
{
FILE *fp;
fp = fopen(name, "rb");
if(fp==NULL){
perror("open_file");
return NULL;
}
fseek(fp, 0, SEEK_END);
*size = ftell(fp);
fseek(fp, 0, SEEK_SET);
return fp;
}
u8 *load_file(char *name, int *size)
{
FILE *fp;
u8 *buf;
fp = open_file(name, size);
if(fp==NULL)
return NULL;
buf = malloc(*size);
fread(buf, *size, 1, fp);
fclose(fp);
return buf;
}
int save_file(char *name, void *buf, int size)
{
FILE *fp;
fp = fopen(name, "wb");
if(fp==NULL){
printf("Open file %s failed!\n", name);
return -1;
}
fwrite(buf, size, 1, fp);
fclose(fp);
return 0;
}
int process_file(char *infname)
{
int filesize;
u8 *inbuf, *outbuf, *p;
char outfname[256];
size_t decsize;
p = strstr(infname, ".encrypt");
if(p == NULL) return 0;
printf("process [%s]\n", infname);
inbuf = load_file(infname, &filesize);
if(NULL == inbuf) return -1;
outbuf = malloc(filesize+16);
CCCrypt(1, 0, 3, "snda.cloudary.epubreader", 0x18, NULL, inbuf, filesize, outbuf, filesize+16, &decsize);
sprintf(outfname, "out/%s", infname);
p = strstr(outfname, ".encrypt");
*p = 0;
save_file(outfname, outbuf, decsize);
free(outbuf);
free(inbuf);
return 0;
}
int process_dir(char *dname)
{
DIR *pdir;
struct dirent *d;
struct stat statbuf;
char fname[256];
int i, ndir;
/* process file */
memset(&statbuf, 0, sizeof(statbuf));
stat(dname, &statbuf);
if((statbuf.st_mode&S_IFMT) != S_IFDIR){
return process_file(dname);
}
/* open directory */
pdir = opendir(dname);
if(pdir==NULL){
printf("Can't open directory <%s>\n", dname);
return -1;
}
/* get number of files in dircetory */
ndir = 0;
while((d=readdir(pdir))){
ndir++;
}
d = malloc(sizeof(struct dirent)*ndir);
/* read dirent first */
rewinddir(pdir);
for(i=0; i<ndir; i++){
memcpy(&d[i], readdir(pdir), sizeof(struct dirent));
}
/* process each files */
//printf("Enter directory <%s> ...\n", dname);
for(i=0; i<ndir; i++){
if( d[i].d_name[0]=='.' &&( d[i].d_name[1] =='\0' || (d[i].d_name[1] == '.' && d[i].d_name[2] == '\0') ))
continue;
if(dname[0]=='.'){
sprintf(fname, "%s", d[i].d_name);
}else{
sprintf(fname, "%s/%s", dname, d[i].d_name);
}
process_dir(fname);
}
free(d);
closedir(pdir);
return 0;
}
int main(int argc, char *argv[])
{
mkdir("out", S_IRWXO);
if(argc==1){
process_dir(".");
}else{
process_dir(argv[1]);
}
return 0;
}
xhtml文件的编号有时会错乱,例如第14卷书的内容如下:
$ ls
662.xhtml 679.xhtml 696.xhtml 813.xhtml 830.xhtml 847.xhtml
663.xhtml 680.xhtml 697.xhtml 814.xhtml 831.xhtml 848.xhtml
664.xhtml 681.xhtml 698.xhtml 815.xhtml 832.xhtml 849.xhtml
665.xhtml 682.xhtml 699.xhtml 816.xhtml 833.xhtml 850.xhtml
666.xhtml 683.xhtml 700.xhtml 817.xhtml 834.xhtml 851.xhtml
667.xhtml 684.xhtml 801.xhtml 818.xhtml 835.xhtml 852.xhtml
668.xhtml 685.xhtml 802.xhtml 819.xhtml 836.xhtml 853.xhtml
669.xhtml 686.xhtml 803.xhtml 820.xhtml 837.xhtml 854.xhtml
670.xhtml 687.xhtml 804.xhtml 821.xhtml 838.xhtml 855.xhtml
671.xhtml 688.xhtml 805.xhtml 822.xhtml 839.xhtml 856.xhtml
672.xhtml 689.xhtml 806.xhtml 823.xhtml 840.xhtml 857.xhtml
673.xhtml 690.xhtml 807.xhtml 824.xhtml 841.xhtml 858.xhtml
674.xhtml 691.xhtml 808.xhtml 825.xhtml 842.xhtml 859.xhtml
675.xhtml 692.xhtml 809.xhtml 826.xhtml 843.xhtml 860.xhtml
676.xhtml 693.xhtml 810.xhtml 827.xhtml 844.xhtml 861.xhtml
677.xhtml 694.xhtml 811.xhtml 828.xhtml 845.xhtml copyright.xhtml
678.xhtml 695.xhtml 812.xhtml 829.xhtml 846.xhtml
其中copyright.xhtml为书顶部的介绍,内容:
=================================================================================================================
版权信息
书名:斗破苍穹(第1301-1400章)
作者:天蚕土豆
字数:本册32万字,全书共532万字
本书自2009年4月起在盛大文学旗下起点中文网连载,后由湖北少年儿童出版社有限公司出版纸版,电子版由云中书城制作发行。
电子版次:2.0
更新时间:2012年4月
版权所有 侵权必究
=================================================================================================================
其中:662表示1362章;801表示1301章;700表示1400章
于是写个程序,copyright.xhtml改名成000.xhtml;700.xhtml改名成100.xhtml;其余情况改名成0xx.xhtml即可。
int process_file(char *infname)
{
char outfname[256];
if(NULL == strstr(infname, ".xhtml")) return 0;
printf("process [%s]\n", infname);
if(0 == strcmp("copyright.xhtml", infname)){
rename(infname, "000.xhtml");
return 0;
}
if(0 == strcmp("700.xhtml", infname)){
rename(infname, "100.xhtml");
return 0;
}
strcpy(outfname, infname);
outfname[0] = '0';
rename(infname, outfname);
return 0;
}
最后通过工具TextForever.exe,先Html->Text,然后文件合并,得到最终的文本。
[注意]传递专业知识、拓宽行业人脉——看雪讲师团队等你加入!