因工作需要监控网络各层发送和接收的数据包。
其中在做http数据包临控时碰到gzip压缩格式,在网友发布的一些技术文章基础上,经过一段时间的研究、调试,终于解析成功。现将核心代码公布于此,希望能够和大家一起共同学习交流 。
注:以下代码需要依赖zlib开源库,可以到网上搜索下载。
/* HTTP gzip decompress */
int CNNHttp::httpgzdecompress(const PVOID zdata, DWORD nzdata,
PVOID data, DWORD *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = (Bytef *)zdata;
d_stream.avail_in = 0;
d_stream.next_out = (Bytef *)data;
if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
----------------------------------------------------------------------------------
class CBuffer
{
std::vector<BYTE> vecBuf;
public:
void Reset(DWORD dwNewSize = 0)
{
vecBuf.resize(dwNewSize);
}
void Reset(PVOID pData, DWORD nLen)
{
vecBuf.clear();
Append(pData, nLen);
}
bool IsEmpty() const
{
return vecBuf.empty();
}
void Append(PVOID pData, DWORD nLen)
{
vecBuf.insert(vecBuf.end(), (PBYTE)pData, (PBYTE)pData + nLen);
}
PBYTE GetData()
{
if (vecBuf.empty())
{
return NULL;
}
return &vecBuf[0];
}
DWORD GetDataLength()
{
return vecBuf.size();
}
BYTE& operator[](DWORD _Pos)
{
return vecBuf[_Pos];
}
};
//返回true表示此次回应的所有ChunkData数据接收结束
bool OnRecvGzipData(CBuffer& cBuf, CBuffer& cBufLeft, CBuffer& cBufTmp, bool bIsChunked, char *szGzipData, int nLen)
{
if (!bIsChunked)
{
cBuf.Append((PBYTE)szGzipData, nLen);
return false;
}
cBufLeft.Append(szGzipData, nLen);
szGzipData = (char*)cBufLeft.GetData();
nLen = (int)cBufLeft.GetDataLength();
while (nLen)
{
int nChunkSize = strtoul(szGzipData, NULL, 16);
if (nChunkSize == 0)
{
return true;
}
char* pos = strstr(szGzipData, "\r\n");
if (!pos)
{
goto ToExit;
}
pos += strlen("\r\n");
int len = (pos - szGzipData) + nChunkSize + strlen("\r\n");
if (len > nLen)
{
goto ToExit;
}
cBuf.Append((PBYTE)pos, nChunkSize);
szGzipData += len;
nLen -= len;
}
cBufLeft.Reset();
return false;
ToExit:
cBufTmp.Reset(szGzipData, (DWORD)nLen);
cBufLeft.Reset(cBufTmp.GetData(), cBufTmp.GetDataLength());
return false;
}
void OnRecvData(HANDLE hand, int iRet, char* buf)
{
//NNLOG_TRACE_FUN();
class CGzipDataPackBuf
{
public:
CBuffer vecByteGzipDataBuf;
CBuffer vecByteGzipDataDecodeBuf;
CBuffer vecByteBufLeft;
bool begin_gzip_text_html;
bool bIsChunked;
bool bIsUtf8;
DWORD dwGetTickCount;
CGzipDataPackBuf()
{
Reset();
}
void Reset()
{
dwGetTickCount = ::GetTickCount();
begin_gzip_text_html = false;
bIsChunked = false;
bIsUtf8 = false;
}
};
typedef std::map<HANDLE, CGzipDataPackBuf> MapCGzipDataPackBuf_T;
static MapCGzipDataPackBuf_T s_MapCGzipDataPackBuf;
static CCriticalSection s_csMapCGzipDataPackBuf;
if (!buf || (0 >= iRet))
{
return;
}
NNLOG_DEBUG(_T("len:%u, data:%S"), iRet, buf);
//CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), buf, (DWORD)iRet);
const DWORD MAX_GzipDataBuf = 1024*1024;
/*
Content-Type: text/html; charset=utf-8
Content-Language: zh-CN
content="text/html;charset=gb2312"
Content-Type: text/html;charset=gbk
*/ char *tstr = NULL;
bool bRecvChunkGzipDataComplete = false;
CGzipDataPackBuf* pCGzipDataPackBuf = NULL;
if ((15 <= iRet)
&& (0 == StrCmpNIA(buf, "HTTP/1.1 200 OK", 15))
&& StrStrIA(buf, "Content-Type: text/html")
&& StrStrIA(buf, "Content-Encoding: gzip")
&& strstr(buf, "\r\n\r\n")//此处未考虑http头信息分包接收的情况
)
{
//NNLOG_TRACE_ACTION_SCOPE(HTTP_200_OK);
NN_WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
pCGzipDataPackBuf = &s_MapCGzipDataPackBuf[hand];
}
else
{
//NNLOG_TRACE_ACTION_SCOPE(Find pCGzipDataPackBuf);
NN_WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
MapCGzipDataPackBuf_T::iterator it = s_MapCGzipDataPackBuf.find(hand);
if (s_MapCGzipDataPackBuf.end() == it)
{
return;
}
pCGzipDataPackBuf = &it->second;
}
char* pos = NULL;
if (!pCGzipDataPackBuf->begin_gzip_text_html
&& (pos = strstr(buf, "\r\n\r\n"))
)
{
//NNLOG_TRACE_ACTION_SCOPE(check http data);
pos[0] = 0;
if (!StrStrIA(buf, "Content-Type: text/html")
|| !StrStrIA(buf, "Content-Encoding: gzip"))
{
//此处未考虑http头信息分包接收的情况
NNLOG_ASSERT(0);//上面已作初步判断,这里一般不太可能发生
WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
s_MapCGzipDataPackBuf.erase(hand);
return;
}
pCGzipDataPackBuf->begin_gzip_text_html = true;
pCGzipDataPackBuf->bIsUtf8 = NULL != StrStrIA(buf, "charset=utf-8");
pCGzipDataPackBuf->bIsChunked = NULL != StrStrIA(buf, "Transfer-Encoding: chunked");
pos[0] = '\r';//还原
pos += strlen("\r\n\r\n");
iRet -= (pos - buf);
buf = pos;
bRecvChunkGzipDataComplete = OnRecvGzipData(pCGzipDataPackBuf->vecByteGzipDataBuf, pCGzipDataPackBuf->vecByteBufLeft, pCGzipDataPackBuf->vecByteGzipDataDecodeBuf, pCGzipDataPackBuf->bIsChunked, buf, iRet);
//if (pCGzipDataPackBuf->bIsChunked)
//{
// CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), (PVOID)buf, (DWORD)iRet);
//}
}
else if (pCGzipDataPackBuf->begin_gzip_text_html)
{
//NNLOG_TRACE_ACTION_SCOPE(append gzip data);
bRecvChunkGzipDataComplete = OnRecvGzipData(pCGzipDataPackBuf->vecByteGzipDataBuf, pCGzipDataPackBuf->vecByteBufLeft, pCGzipDataPackBuf->vecByteGzipDataDecodeBuf, pCGzipDataPackBuf->bIsChunked, buf, iRet);
//if (pCGzipDataPackBuf->bIsChunked)
//{
// CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), (PVOID)buf, (DWORD)iRet);
//}
}
if (!pCGzipDataPackBuf->vecByteGzipDataBuf.IsEmpty()
&& (!pCGzipDataPackBuf->bIsChunked || bRecvChunkGzipDataComplete)
|| (MAX_GzipDataBuf < pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength()) )
{
//NNLOG_TRACE_ACTION_SCOPE(try parse gzip);
DWORD Length = MAX_GzipDataBuf*2;
pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.Reset(Length);
--Length;
int iRetDec = CNNHttp::httpgzdecompress(pCGzipDataPackBuf->vecByteGzipDataBuf.GetData(), pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength(), pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), &Length);
if (0 == iRetDec)
{
//<input type=hidden name=tn value="77071064_1_pg">
pCGzipDataPackBuf->vecByteGzipDataDecodeBuf[Length] = '\0';
CString gzipData;
if (pCGzipDataPackBuf->bIsUtf8)
{
gzipData = CA2CT((const char*)pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), CP_UTF8);
}
else
{
gzipData = CA2CT((const char*)pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), CP_ACP);
//NNLOG_DEBUG(_T("gzip len:%u, data:%S"), Length, vecByteGzipDataDecodeBuf.GetData());
}
if (!gzipData.IsEmpty())
{
if (mc.GetdwHttpGzipPackMaxShowLen() < (DWORD)gzipData.GetLength())
{
gzipData = gzipData.Left(mc.GetdwHttpGzipPackMaxShowLen() / 2) + _T("...") + gzipData.Right(mc.GetdwHttpGzipPackMaxShowLen() / 2);
}
NNLOG_DEBUG(_T("gzip len:%u, data:%s"), gzipData.GetLength(), gzipData.GetString());
}
//pCGzipDataPackBuf->vecByteGzipDataBuf.Reset();
}
if (((0 == iRetDec) && !pCGzipDataPackBuf->bIsChunked) || bRecvChunkGzipDataComplete || (MAX_GzipDataBuf < pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength()))
{
WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
s_MapCGzipDataPackBuf.erase(hand);
}
}
{
DWORD dwGetTickCount = ::GetTickCount();
WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
NNLOG_DEBUG(_T("s_MapCGzipDataPackBuf.size():%u"), s_MapCGzipDataPackBuf.size());
BOOST_FOREACH(MapCGzipDataPackBuf_T::value_type& v, s_MapCGzipDataPackBuf)
{
CGzipDataPackBuf& gdpb(v.second);
if (dwGetTickCount - gdpb.dwGetTickCount > 1000 * 60)
{
s_MapCGzipDataPackBuf.erase(v.first);
break;//下次再处理其它的
}
}
}
} 在各层hook临控的代码中调用上面函数:
DEFINE_MY_WINAPI_RET(int, recv)(
IN SOCKET s,
__out_bcount_part(len, return) __out_data_source(NETWORK) char FAR * buf,
IN int len,
IN int flags
)
{
LOG_TRACE_FUN();
int iRet = recv_(s, buf, len, flags);
OnRecvData((HANDLE)s, iRet, buf);
return iRet;
}
DEFINE_MY_WINAPI_RET(int, WSARecv)(
IN SOCKET s,
__in_ecount(dwBufferCount) __out_data_source(NETWORK) LPWSABUF lpBuffers,
IN DWORD dwBufferCount,
__out_opt LPDWORD lpNumberOfBytesRecvd,
IN OUT LPDWORD lpFlags,
__in_opt LPWSAOVERLAPPED lpOverlapped,
__in_opt LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine
)
{
int iRet = WSARecv_(s, lpBuffers, dwBufferCount, lpNumberOfBytesRecvd, lpFlags, lpOverlapped, lpCompletionRoutine);
if ((0 == iRet) && !(lpNumberOfBytesRecvd && (0 == *lpNumberOfBytesRecvd))/* || (WSA_IO_PENDING == ::GetLastError())*/)
{
LOG_TRACE_FUN();
for (DWORD i = 0; i < dwBufferCount; ++i)
{
OnRecvData((HANDLE)s, (lpNumberOfBytesRecvd && (1 == dwBufferCount)) ? *lpNumberOfBytesRecvd : (int)lpBuffers[i].len, lpBuffers[i].buf);
}
}
return iRet;
}
NTSTATUS
MYNTAPI(NtDeviceIoControlFile)(HANDLE FileHandle,
HANDLE Event,
PIO_APC_ROUTINE ApcRoutine,
PVOID ApcContext,
PIO_STATUS_BLOCK IoStatusBlock,
ULONG IoControlCode,
PVOID InputBuffer,
ULONG InputBufferLength,
PVOID OutputBuffer,
ULONG OutputBufferLength
)
{
PAFD_WSABUF lpBuffers = NULL;
PAFD_INFO AfdInfo = (PAFD_INFO)InputBuffer;
if (((AFD_RECV == IoControlCode) || (IoControlCode == AFD_SEND)) && AfdInfo && AfdInfo->BufferArray)
{
lpBuffers = AfdInfo->BufferArray;
}
NTSTATUS st = NtDeviceIoControlFile_(FileHandle,
Event,
ApcRoutine,
ApcContext,
IoStatusBlock,
IoControlCode,
InputBuffer,
InputBufferLength,
OutputBuffer,
OutputBufferLength);
if (AFD_RECV == IoControlCode)
{
if (NT_SUCCESS(st) && lpBuffers && lpBuffers->buf)
{
LOG_TRACE_FUN();
OnRecvData(FileHandle, IoStatusBlock->Information, lpBuffers->buf);
}
}
return st;
}
[招生]科锐逆向工程师培训(2024年11月15日实地,远程教学同时开班, 第51期)