能力值:
( LV4,RANK:40 )
|
-
-
2 楼
为何放弃治疗?
友情提示:bp send 跟包 找上层加密函数 自己写解密
|
能力值:
( LV2,RANK:10 )
|
-
-
3 楼
�'`* �6y�/� �"小"冠"衣"热"公"虹"秋"野"遁"雾"火"猴"肉"帅"呜"桃"帮"侠"生"蜜"敢"鲨"八"莉(
不就是utf8么
|
能力值:
( LV3,RANK:20 )
|
-
-
4 楼
从什么转到UTF8? ansi Unicode转都是乱码
|
能力值:
( LV12,RANK:250 )
|
-
-
5 楼
这编码,明显是UTF8。
win7及以上,kernel自带Unicode转UTF8的API,名字忘记了,自求度娘。
win7以下,系统不自带转换函数,需要自己实现。不过好像其它高级语言有转换的库函数,也自求度娘。
给你一个片段:
static const int gk_utf8_max_byte = 6; //utf8最大占用字节
#pragma warning(push)
#pragma warning(disable:4244) //warning C4244: “=”: 从“const unsigned long”转换到“unsigned char”,可能丢失数据
int unicode_byte2utf8_byte(unsigned char* utf8,
const unsigned long unicode)
{
unsigned char tu[gk_utf8_max_byte];
if(utf8 == NULL) utf8 = (unsigned char*)&tu;
//0000 0000 0000 0000 0000 0000 0111 1111 7bit
//0000 0000 0000 0000 0000 0000 0XXX XXXX 7bit
if(unicode < 0x00000080)
{
utf8[0] = ((unicode & 0x0000007F) >> 0) | 0x00;
return 1;
}
//0000 0000 0000 0000 0000 0000 10XX XXXX 6bit
const unsigned long a = ((unicode & 0x0000003F) >> 0) | 0x80;
//0000 0000 0000 0000 0000 0111 1111 1111 11bit
//0000 0000 0000 0000 0011 0XXX XX00 0000 5bit
if(unicode < 0x00000800)
{
utf8[1] = a;
utf8[0] = ((unicode & 0x000007C0) >> 6) | 0xC0;
return 2;
}
//0000 0000 0000 0000 0010 XXXX XX00 0000 6bit
const unsigned long b = ((unicode & 0x00000FC0) >> 6) | 0x80;
//0000 0000 0000 0000 1111 1111 1111 1111 16bit
//0000 0000 0000 1110 XXXX 0000 0000 0000 4bit
if(unicode < 0x00010000)
{
utf8[2] = a; utf8[1] = b;
utf8[0] = ((unicode & 0x0000F000) >> 12) | 0xE0;
return 3;
}
//0000 0000 0000 10XX XXXX 0000 0000 0000 6bit
const unsigned long c = ((unicode & 0x0003F000) >> 12) | 0x80;
//0000 0000 0001 1111 1111 1111 1111 1111 21bit
//0000 0011 110X XX00 0000 0000 0000 0000 3bit
if(unicode < 0x00200000)
{
utf8[3] = a; utf8[2] = b; utf8[1] = c;
utf8[0] = ((unicode & 0x001C0000) >> 18) | 0xF0;
return 4;
}
//0000 0010 XXXX XX00 0000 0000 0000 0000 6bit
const unsigned long d = ((unicode & 0x00FC0000) >> 18) | 0x80;
//0000 0011 1111 1111 1111 1111 1111 1111 26bit
//1111 10XX 0000 0000 0000 0000 0000 0000 2bit
if(unicode < 0x04000000)
{
utf8[4] = a; utf8[3] = b; utf8[2] = c; utf8[1] = d;
utf8[0] = ((unicode & 0x03000000) >> 24) | 0xF8;
return 5;
}
//00XX XXXX 0000 0000 0000 0000 0000 0000 6bit
const unsigned long e = ((unicode & 0x3F000000) >> 24) | 0x80;
//0111 1111 1111 1111 1111 1111 1111 1111 31bit
//0X00 0000 0000 0000 0000 0000 0000 0000 1bit
if(unicode < 0x80000000)
{
utf8[5] = a; utf8[4] = b; utf8[3] = c; utf8[2] = d; utf8[1] = e;
utf8[0] = ((unicode & 0x04000000) >> 30) | 0xFC;
return 6;
}
return 0;
}
#pragma warning(pop)
|
能力值:
( LV3,RANK:20 )
|
-
-
6 楼
不对.Unicode转UTF8中文转换后结果是小这个样子的~
|
能力值:
( LV12,RANK:250 )
|
-
-
7 楼
哥,代码都贴给你了,还不信。不信去问百度http://baike.baidu.com/view/25412.htm
看了utf8编码的定义就可以知道,只要是中文,utf8编码应该都是3 byte,且都以0xE开头。
u5C0F是“小”的Unicode编码,不是UTF8编码
|
能力值:
( LV3,RANK:20 )
|
-
-
8 楼
是的.我看过了.
我想把封包转换成中文.直接套用易语言拿模块测试了下Utf8到Unicode的代码.
可是转换依然未实现中文的效果. 显示全是乱码..
恕我愚钝..请指教.
|
能力值:
( LV12,RANK:250 )
|
-
-
9 楼
代码给你,有用的自己抠出来。到这地步你还不会,我也没办法了。
我看了网上在线转换UTF8的点,都很扯,转换完全不正确。
至于易语言,我不知道是不是它的实现不行,还是你的参数不正确。
建议去理解一下Unicode编码规范以及UTF-8编码规范。
另外,你贴出来的原始封包,并不是全是UTF-8,只有中间一段文字是UTF-8,不能全转换 ws_utf8.h
/*!
\file ws_utf8.h
\brief ws_utf8.h用于如UTF8与UNICODE相互转换
\section ws_utf8 ver 1.1.1305.2510 (For All)
- \b 2013-03-07 新增unicode与utf8编码的转换。\n
ascii与utf8的转换需要自行先从ascii转换为unicode。0.1
- \b 2013-03-08 发现WinXP的ntdll不提供UTF函数,故参考UTF8文档,重新实现。0.1~1.0
- \b 2013-03-09 扩展支持4 byte Unicode
- \b 2013-05-25 处理转换时多处理一个数据的BUG。1.0~1.1
\author triones
\date 2013-03-07
*/
#pragma once
#include "blks.h"
/*!
转换一个unicode字符为一个utf8字符
\param utf8 utf8结果缓冲,可为NULL
\param unicode unicode字符
\return 返回转换字节数,返回0表示失败
*/
int unicode_byte2utf8_byte(unsigned char* utf8,
const unsigned long unicode);
/*!
转换一个utf8字符为一个unicode字符\n
注意,自动跳过非法的UTF8字符、不完整的UTF8字符
\param unicode 结果缓冲,可为NULL
\param utf8 utf8字符指针
\return 返回读取utf8字节数,返回0表示失败
*/
int utf8_byte2unicode_byte(unsigned long* unicode,
const unsigned char* const utf8);
//! UNICODE串转换UTF8串(版本一)
/*!
\param utf8 指向转换UTF8结果的缓冲区
\param max_utf8 指示转换UTF8结果的缓冲区的最大容量(以byte计)
\param ws 需要转换的UNICODE串
\param ws_len 需要转换的UNICODE串的长度(以宽字计)\n
ws_len缺省为-1时,视ws为null结束的串\n
注意:需要用户自行提供足够转换缓冲。另请注意参数顺序
\return 转换成功与否
\code
#include "ws_s.h"
char str[40];
if(!ws2utf8(str,sizeof(str),L"文字"))
{
cout<<"ws2utf8转换出错,LastError:"<<GetLastError();
}
\endcode
*/
bool ws2utf8(unsigned char* uft8,
const int max_uft8,
const wchar_t* ws,
const int ws_len = -1);
//! UNICODE串转换UTF8串(版本二)
/*!
\param ws 需要转换的UNICODE串
\return 转换后的对应UTF8串对象
\code
#include "ws_s.h"
blks<char> s = ws2utf8(L"文字");
if(s.empty())
{
cout<<"ws2utf8转换出错,LastError:"<<GetLastError();
}
\endcode
*/
blks<unsigned char> ws2utf8(const wchar_t* ws);
//! UTF8串转换UNICODE串(版本一)
/*!
\param ws 指向转换UNICODE结果的缓冲区
\param max_ws 指示转换UNICODE结果的缓冲区的最大容量(以宽字计)
\param utf8 需要转换的UTF8串
\param utf8_len 需要转换的UTF8串的长度(以宽字计)\n
utf8_len缺省为-1时,视s为null结束的串
\return 转换成功与否\n
注意:需要用户自行提供足够转换缓冲。另请注意参数顺序
*/
bool utf82ws(wchar_t* ws,
const int max_ws,
const unsigned char* utf8,
const int utf8_len = -1);
//! UTF8串转换UNICODE串(版本二)
/*!
\param utf8 需要转换的UTF8串
\return 转换后的对应UNICODE串对象
*/
blks<wchar_t> utf82ws(const unsigned char* utf8);
------------------------------------------------
ws_utf8.cpp
#include "ws_utf8.h"
static const int gk_utf8_max_byte = 6; //utf8最大占用字节
#pragma warning(push)
#pragma warning(disable:4244) //warning C4244: “=”: 从“const unsigned long”转换到“unsigned char”,可能丢失数据
int unicode_byte2utf8_byte(unsigned char* utf8,
const unsigned long unicode)
{
unsigned char tu[gk_utf8_max_byte];
if(utf8 == NULL) utf8 = (unsigned char*)&tu;
//0000 0000 0000 0000 0000 0000 0111 1111 7bit
//0000 0000 0000 0000 0000 0000 0XXX XXXX 7bit
if(unicode < 0x00000080)
{
utf8[0] = ((unicode & 0x0000007F) >> 0) | 0x00;
return 1;
}
//0000 0000 0000 0000 0000 0000 10XX XXXX 6bit
const unsigned long a = ((unicode & 0x0000003F) >> 0) | 0x80;
//0000 0000 0000 0000 0000 0111 1111 1111 11bit
//0000 0000 0000 0000 0011 0XXX XX00 0000 5bit
if(unicode < 0x00000800)
{
utf8[1] = a;
utf8[0] = ((unicode & 0x000007C0) >> 6) | 0xC0;
return 2;
}
//0000 0000 0000 0000 0010 XXXX XX00 0000 6bit
const unsigned long b = ((unicode & 0x00000FC0) >> 6) | 0x80;
//0000 0000 0000 0000 1111 1111 1111 1111 16bit
//0000 0000 0000 1110 XXXX 0000 0000 0000 4bit
if(unicode < 0x00010000)
{
utf8[2] = a; utf8[1] = b;
utf8[0] = ((unicode & 0x0000F000) >> 12) | 0xE0;
return 3;
}
//0000 0000 0000 10XX XXXX 0000 0000 0000 6bit
const unsigned long c = ((unicode & 0x0003F000) >> 12) | 0x80;
//0000 0000 0001 1111 1111 1111 1111 1111 21bit
//0000 0011 110X XX00 0000 0000 0000 0000 3bit
if(unicode < 0x00200000)
{
utf8[3] = a; utf8[2] = b; utf8[1] = c;
utf8[0] = ((unicode & 0x001C0000) >> 18) | 0xF0;
return 4;
}
//0000 0010 XXXX XX00 0000 0000 0000 0000 6bit
const unsigned long d = ((unicode & 0x00FC0000) >> 18) | 0x80;
//0000 0011 1111 1111 1111 1111 1111 1111 26bit
//1111 10XX 0000 0000 0000 0000 0000 0000 2bit
if(unicode < 0x04000000)
{
utf8[4] = a; utf8[3] = b; utf8[2] = c; utf8[1] = d;
utf8[0] = ((unicode & 0x03000000) >> 24) | 0xF8;
return 5;
}
//00XX XXXX 0000 0000 0000 0000 0000 0000 6bit
const unsigned long e = ((unicode & 0x3F000000) >> 24) | 0x80;
//0111 1111 1111 1111 1111 1111 1111 1111 31bit
//0X00 0000 0000 0000 0000 0000 0000 0000 1bit
if(unicode < 0x80000000)
{
utf8[5] = a; utf8[4] = b; utf8[3] = c; utf8[2] = d; utf8[1] = e;
utf8[0] = ((unicode & 0x04000000) >> 30) | 0xFC;
return 6;
}
return 0;
}
#pragma warning(pop)
int utf8_byte2unicode_byte(unsigned long* unicode,
const unsigned char* const utf8)
{
if(utf8 == NULL) return 0;
unsigned long tu;
if(unicode == NULL) unicode = &tu;
const unsigned char utf8_flag[gk_utf8_max_byte] =
{0x7F,0xC0,0xE0,0xF0,0xF8,0xFC};
bool done = false;
int lp = 0;
while(!done)
{
if(utf8[lp] <= utf8_flag[0])
{
*unicode = utf8[lp];
return 1;
}
if(utf8[lp] < utf8_flag[1]) //首字节非法,跳过
{
++lp;
continue;
}
for(int i = 2; i < gk_utf8_max_byte; ++i)
{
if((utf8[lp] < utf8_flag[i]))
{
unsigned long u = utf8[lp] ^ utf8_flag[i-1];
++lp;
int j = 1;
for(; j < i; ++j)
{
++lp;
if(utf8[lp-1] >= utf8_flag[1]) break; //后继字节非法,跳过
u <<= 6;
u |= (utf8[lp-1] & 0x3F);
}
if(j == i)
{
*unicode = u;
return lp;
}
done = true; //读取UTF字符不完整,需要重来
}
if(done)
{
done = false;
break;
}
}
}
return 0;
}
bool ws2utf8(unsigned char* uft8,
const int max_uft8,
const wchar_t * ws,
const int ws_len)
{
if((uft8 == NULL) || (max_uft8 <= 1) || (ws == NULL) || (ws_len == 0)) return false;
int wlen = ws_len;
if(ws_len < 0)
{
for(wlen = 0; ws[wlen] != TEXT('\0'); ++wlen);
}
++wlen;
int lp = 0;
for(int i = 0; i < wlen ; ++i)
{
int k = unicode_byte2utf8_byte(&uft8[lp],ws[i]);
if(k == 0) return false;
lp += k;
if(lp >= max_uft8) return false;
}
return true;
}
blks<unsigned char> ws2utf8(const wchar_t* ws)
{
blks<unsigned char> utf8;
if(ws == NULL) return utf8;
int wlen = 0;
for(; ws[wlen] != TEXT('\0'); ++wlen);
++wlen;
unsigned char u[gk_utf8_max_byte];
for(int i = 0; i < wlen ; ++i)
{
int k = unicode_byte2utf8_byte(u,ws[i]);
if(k == 0)
{
utf8.clear();
break;
}
utf8.put(u,k);
}
return utf8;
}
bool utf82ws(wchar_t* ws,
const int max_ws,
const unsigned char* utf8,
const int utf8_len)
{
if((ws == NULL) || (max_ws <= 1) || (utf8 == NULL) || (utf8_len == 0)) return false;
int ulen = utf8_len;
if(utf8_len < 0)
{
for(ulen = 0; utf8[ulen] != '\0'; ++ulen);
}
++ulen;
int lp = 0;
for(int i = 0; i < ulen ;)
{
unsigned long ch;
int k = utf8_byte2unicode_byte(&ch,&utf8[i]);
if(k == 0) return false;
ws[lp] = (wchar_t)ch;
if(lp >= max_ws) return false;
++lp;
i += k;
}
return true;
}
blks<wchar_t> utf82ws(const unsigned char* utf8)
{
blks<wchar_t> ws;
if(utf8 == NULL) return ws;
int ulen = 0;
for(; utf8[ulen] != '\0'; ++ulen);
++ulen;
for(int i = 0; i < ulen ;)
{
unsigned long ch;
int k = utf8_byte2unicode_byte(&ch,&utf8[i]);
if(k == 0)
{
ws.clear();
return ws;
}
ws << (wchar_t)ch;
i += k;
}
return ws;
}
|
能力值:
( LV3,RANK:20 )
|
-
-
10 楼
非常感谢.我在尝试解决
|
|
|