mov cl,ah
cmp ecx,fe
jg short @jj1001
cmp ecx,40
jl short @jj1001
;检查低位
mov cl,al
cmp ecx,fe
jg short @jj1001
cmp ecx,81
jl short @jj1001
;到这里就说明是asni
mov ecx,1;这里加入连续两个是符号的情况判断
@jj1001:
;这里去掉连续两个为符号的情况
mov cx,ax
cmp ecx,39
jg short @jj1002
cmp ecx,30
jl short @jj1002
;跳走
jmp short @lj1158
@jj1002:
cmp ecx,5a
jg short @jj1003
cmp ecx,41
jl short @jj1003
;跳走
jmp short @lj1158
@jj1003:
cmp ecx,7a
jg short @jj1004
cmp ecx,61
jl short @jj1004
;跳走
jmp short @lj1158
@jj1004:
shr eax,8
mov cx,ax
mov cx,ax
cmp ecx,39
jg short @jj1005
cmp ecx,30
jl short @jj1005
;跳走
jmp short @lj1158
@jj1005:
cmp ecx,5a
jg short @jj1006
cmp ecx,41
jl short @jj1006
;跳走
jmp short @lj1158
@jj1006:
cmp ecx,7a
jg short @jj1007
cmp ecx,61
jl short @jj1007
;跳走
jmp short @lj1158
@jj1007:
;能到这里就是两个符号的ascii
jmp short @l5
@lj1158:
mov byte [edx],1
jmp short @l5;跳出循环
@l2:
cmp al,0x55
jne short @l5;这里保留以U开头的字符串
cmp byte [5fa8cb],0
jne short @k1741
;这里分utf和正常的unicode
;cmp byte [edx+2],1
;je @l5
;mov byte [edx-1],0
;mov byte [edx-2],0
shr eax,10
cmp al,4e
jne short @k1741
mov eax, [ecx]
cmp al,7e
jg short @l5;小于20的去掉
cmp al,20
jl short @l5
cmp ah,0
jne short @l5
;上面检查第一位是否为ascii
;下面检查第二位是否为ascii
mov eax,[ecx+2]
cmp al,7e
jg short @l5;小于20的去掉
cmp al,20
jl short @l5
cmp ah,0
jne short @l5
@k1741:
mov byte [edx],1;这里的edx=5fa8d0
@l5:
pop ecx
cmp byte [5fa8cb],0
jne short @ls1126
cmp byte [edx+2],1;edx=5fa8d0
jne short @ls1126
mov byte [edx],0
@ls1126:
mov byte [edx+2],0
cmp byte [5fa8cb],0
je short @l9
;cmp byte [edx-1],1 ; 字符查找标志[005FA8cf]
;jne short @l10
;mov byte [edx],0
;@l10:
cmp byte [edx-2],1; 字符乱码标志[005fabce]
jne short @l102
mov byte [edx],0
@l102:
;比较一些符号
cmp eax,0x3001
jl short @asc12011
cmp eax ,0x301f
jg short @asc12011
inc edx; 记录unicode汉字数值
add byte [ebp-4],1
jmp short @asco1159;跳出检查
@asc12011:
cmp eax,0x2460
jl short @asc12012
cmp eax ,0x24fe
jg short @asc12012
inc edx; 记录unicode汉字数值
add byte [ebp-4],1
jmp short @asco1159;跳出检查
@asc12012:
cmp eax,0x2200
jl short @asc12013
cmp eax ,0x22f0
jg short @asc12013
inc edx; 记录unicode汉字数值
add byte [ebp-4],1
jmp short @asco1159;跳出检查
@asc12013:
cmp eax,0x2100
jl short @asc12014
cmp eax ,0x2130
jg short @asc12014
inc edx; 记录unicode汉字数值
add byte [ebp-4],1
jmp short @asco1159;跳出检查
@asc12014:
cmp eax,0x2160
jl short @asc12015
cmp eax ,0x2199
jg short @asc12015
inc edx; 记录unicode汉字数值
add byte [ebp-4],1
jmp short @asco1159;跳出检查
@asc12015:
;判断一些单字符
cmp eax,0xD
jne short @asc12016
inc edx; 记录unicode汉字数值
add byte [ebp-4],2
jmp short @asco1159;跳出检查
@asc12016:
cmp eax,0xA
jne short @asc12017
inc edx; 记录unicode汉字数值
add byte [ebp-4],2
jmp short @asco1159;跳出检查
@asc12017:
@asco1159:
;这里得出字母的unicode
test edx,edx
jne short @l1204 ;如果第一个都不是unicode,我们就不继续测试
jmp short @l1103
@l1204:
;ecx=0,则该字符不是unicode
;如果此时ebx=1,我们就不继续测试unicode
;ecx>0,我们暂且认为是unicode
;如果连续n个中出现dh>dl的情况我就认为这是unicode
cmp byte [5fa8d1],0
;je short @kk941
cmp edx,2
jne short @k15150
cmp byte [ebp-4],3
jne short @kk913
;这里去掉汉字+unicode字母的情况
mov byte [ebp+3],1;[ebp+3]为乱码标志
jmp @l1252;跳出循环
@kk913:
;cmp byte [ebp-4],4;这里检测出两个连续的unicode字母
;jne short @k15160
;jmp @l1252
;@k15160:
@k15150:
or ax, ax;当遇到两个字节为0时跳走
jne short @l1233
cmp edx,1
jle short @l1103
;能到这里说明是unicode
mov byte [ebp],1;[ebp]为unicode标志
mov dword [ebp-4],edx;保存字符数量
jmp short @l1103
@l1233:
cmp ebx,edx
je short @l1059
;如果ebx>dh,说明出现非汉字
mov byte [ebp+1],1;[ebp+1]为非unicode字符标志
jmp short @l1103;跳出循环
@l1059:
jmp @l1104
@l1103:
;根据[ebp]=1的条件判断是否为unicode
mov esi,edi;重新读入字符地址
cmp byte [ebp],1
je @l1252
mov byte [ebp+1],0;[ebp+1]为非unicode字符标志
;这里判断ascii
xor ebx,ebx
xor edx,edx
xor ecx,ecx
mov byte [ebp-4],0;[5fa8c7]=0 字符数
@l1259:
lods word ptr [esi]
inc ebx ;字符数计数
nop
;先判断低位字符是否为acsii
mov cl,al
cmp ebx,1
jne @l1202
;如果第一个字节为0就跳走
cmp al,0
je @l1252
@l1202:
cmp ecx,7f
jg short @asc1305
cmp ecx,20
jl short @asc1305
inc edx
cmp edx,2
jg short @k1514
inc byte [ebp-4]
@k1514:
dec esi ;esi向前移动1位
jmp @l1303
@asc1305:
;我们再判断是否为ascii16
mov cl,ah
cmp ecx,fe
jg short @asc1045
cmp ecx,40
jl short @asc1045
;检查低位
mov cl,al
cmp ecx,fe
jg short @asc1045
cmp ecx,81
jl short @asc1045
inc edx
cmp edx ,2
jg short @k1512
add byte [ebp-4],3
@k1512:
@asc1045:
@l1303:
;去掉一些汉字+ascii字母的情况
cmp dword [5fa8d1],0
je short @k1515
cmp edx,2
jne short @k1515
cmp byte [ebp-4],4
jne short @k1516
jmp short @l1252
@k1516:
@k1515:
or al, al;当遇到两个字节为0时跳走
jne short @l1301
cmp edx,1
jle short @l1252
;能到这里说明是ascii16
mov byte [ebp],2;[ebp]为ascii16标志
;cmp dword [5fa8d1],0
cmp edx,64
jg short @l1252
;jne short @l1252
;add edx,edx
;inc edx
cmp byte [ebp],0
je short @l1211
cmp byte [ebp+1],0
je short @l1343
@l1211:
mov byte [ebp+3],1;5fa8ce 乱码标志
@l1343:
cmp byte [5fa8d2],1;20141205加入的判断
jne short @mm1909
mov byte [ebp+3],1;5fa8ce 乱码标志
@mm1909:
popad
@l1336:
cmp dword [ebp-4],0;保存字符数量 [5fa8c7]
je short @l1531