首页
社区
课程
招聘
[原创]复合文档格式分析
发表于: 2009-12-27 01:25 11874

[原创]复合文档格式分析

2009-12-27 01:25
11874
参加了看雪10周年活动。见了很多牛牛,但苦于近来工作忙,经常潜水中,,不能白拿kanxue的u盘,也来回报下,重新活动下。。
如今,复合文档格式被广泛使用,虽然格式早就被人研究明白,也有相应的文档。
但网上大多都是些文字介绍,因此,想写个格式分析的东西来加深对复合文档的理解,
此小程序有如下功能:
1:打印输出文档头 512字节
2:分析ssat,sat,short sat
3:分析各个steam 使用的sat 和ssat的扇区号,
可在windows 和linux 下使用,更多功能我会逐步扩充好加上来。。。
写此程序的目的只是为了巩固下对复合文档的学习,具体的格式就不写了
,附件里是复合文档格式,英文好的看,

有用得着的朋友注意下,由于只是解析了下,没有建立红黑树,没有考虑ssat大于109扇区的情况,需要的可以此基础上扩充,应该不难。。。
#include <iostream>
#include <fstream>

#include <math.h>
#include <tchar.h>
#include <cassert>
#include <string>
#include <iomanip>
#include <vector>
#include <list>

#include "ComDoc.h"

using namespace std;

/* declare const */

 int	SectorSize = 0;
 int	miniSectorSize = 0;
 ULONG  MaxMiniStreamSize =0; /** short stream 的最大长度。用来判断使用的sat表式那种类型,小于在ssat中 */
/* declare function */
bool DumpDocHeader(PDocHeader pHeader);
bool IfReadFile(ifstream &inStream,unsigned char * buf,unsigned int iReadOffest,size_t size);
int GetOffestFremSid(SECT sid);
bool ProcessDirEntry(PDirectoryEntry pDirEntry,vector<int> & slist,vector<int> & sslist,vector<vector<int> >& FatOfDirEntry,int );

int main(int argc,char *argv[])
{
	cout<<"Enter the name of the input file :\n";
	string	 inputFileName;
	getline(cin,inputFileName);
	
	BYTE * lpHeaderBuf  = new BYTE[512];

	memset(lpHeaderBuf,0,512);

	ifstream	inStream;
	inStream.open(inputFileName.data(),ios::binary|ios::in);
	assert(inStream.is_open());
	inStream.read((char*)lpHeaderBuf,512);
	PDocHeader   pHeaderSec = (PDocHeader)lpHeaderBuf;
	
	cout<<"open the "<<inputFileName<<" file is successful\n "<<endl;
	DumpDocHeader(pHeaderSec);
	MaxMiniStreamSize = pHeaderSec->_ulMiniSectorCutoff;
	/**********************************************************************/
	/* 处理msat                                                                      */
	/************************************************************************/
	unsigned long  iMastSize = 0;
	vector<int> vMastList;  /** 用于存储sat链表*/
	if ((pHeaderSec->_sectDifStart == ENDOFCHAIN)&&(pHeaderSec->_csectDif == 0))
	{
		for(iMastSize = 1;iMastSize <= 109 ;iMastSize ++)
		{
			if ((( pHeaderSec->_sectFat[iMastSize-1] ) ==  0xFFFFFFFF))
			{
				break;
			}
			else
			{
				
				BYTE  *SecBuf = new BYTE[SectorSize];
				IfReadFile(inStream,SecBuf,GetOffestFremSid((pHeaderSec->_sectFat[iMastSize - 1] )),SectorSize);
				int * pListOfMast = (int *)SecBuf;
				int i = 0;
				while( pListOfMast[i]!= FREESECT )
				{
					vMastList.push_back(pListOfMast[i]);
					//cout<<vMastList.front()<<" \t"<<vMastList.back()<<'\t';
					
					cout<<"MAST["<<i<<"] == ";
					cout<<vMastList[i]<<'\t';
					if (vMastList[vMastList.size()-1] == ENDOFCHAIN)
					{
						cout<<endl;
					}
					i++;
				}
				
				delete []SecBuf;
			}
		
		}
	}
	else
	{
		/** Mast 大于 109个扇区的情况 */
	}

	/************************************************************************/
	/* 处理ssat                                                                     */
	/************************************************************************/
	
	/*
	 * 读取存取ssat短链分配表占用的扇区链,
	 */
	vector<int>	vSsatFat;
	vSsatFat.push_back(pHeaderSec->_sectMiniFatStart);
	int index = vSsatFat[0];
	while(vMastList[index] != ENDOFCHAIN )
	{
		index = vMastList[index];
		vSsatFat.push_back(index);

	}
	
	vSsatFat.push_back(vMastList[index]);
	int i = 0; /** 循环计数器*/
	for (i; i < vSsatFat.size();i++)
	{
		cout<<"S-FAT["<<i<<"] == ";
		cout<<hex<<vSsatFat[i]<<'\t';
	}
	cout<<endl;

	/*
	 * 读取ssat链,即用于记录short stream 的链
	 * pHeaderSec->_csectMiniFat 指出short stream 占几个扇区
	 */
	vector<int> vSsatList;

	for (i = 1; i <= pHeaderSec->_csectMiniFat; i++)
	{
		
			BYTE  *SecBuf = new BYTE[SectorSize];
			IfReadFile(inStream,SecBuf,GetOffestFremSid(vSsatFat[i-1]),SectorSize);
			int * pListOfMast = (int *)SecBuf;
			int i = 0;
			while( pListOfMast[i]!= FREESECT )
			{
				vSsatList.push_back(pListOfMast[i]);
				//cout<<vMastList.front()<<" \t"<<vMastList.back()<<'\t';
				
				cout<<"SSAT["<<i<<"] == ";
				cout<<vSsatList[i]<<'\t';
				if (vSsatList[vSsatList.size()-1] == ENDOFCHAIN)
				{
					cout<<endl;
				}
				i++;
			}
			
			delete []SecBuf;
		
	}
	
	/************************************************************************/
	/* Process Directory                                                                     */
	/************************************************************************/
	
	/*
	 * 读取存取directory 的扇区sid链 
	 */
	vector<int>	vDirFat;
	vDirFat.push_back(pHeaderSec->_sectDirStart);
	index = vDirFat[0];
	while(vMastList[index] != ENDOFCHAIN )
	{
		index = vMastList[index];
		vDirFat.push_back(index);
		
	}
	
	vDirFat.push_back(vMastList[index]);

	for (i =0; i < vDirFat.size();i++)
	{
		cout<<"Director-FAT["<<i<<"] == ";
		cout<<hex<<vDirFat[i]<<'\t';
	}
	cout<<endl;
	cout<<vDirFat.size()<<endl;

	/*
	 *分析并处理directory目录
	 */
	vector<DirectoryEntry> lDirList;
	for (i=0; i<(vDirFat.size()-1) ;i++)
	{
		BYTE  *SecBuf = new BYTE[SectorSize];
		IfReadFile(inStream,SecBuf,GetOffestFremSid(vDirFat[i]),SectorSize);
		PDirectoryEntry pDirEntry = PDirectoryEntry(SecBuf);
		for (int j = 0;j<4;j++)
		{
				DirectoryEntry  tempDirEntry = pDirEntry[j];
				lDirList.push_back(tempDirEntry);

		}
	
	
		
		delete []SecBuf;
	}

	vector< vector<int> > vFatOfDirEntry;
	/*
	 *processing  DirectoryEntry list
	 */
	for(i = 0;i<lDirList.size();i++)
	{
		ProcessDirEntry(&lDirList[i],vMastList,vSsatList,vFatOfDirEntry,i)	;
	}
	


	delete []lpHeaderBuf;

	inStream.close();
	return 0;
}

bool ProcessDirEntry(PDirectoryEntry pDirEntry,vector<int> & slist,vector<int> & sslist,vector<vector<int> >& vFatOfDirEntry,int i)
{
	string DirName;
	unsigned int index;
	vector<int> FatOfDirEntry;
	if (pDirEntry->_cb == 0)
	{
		return false;
	}else
	{
		char buf[256] ={0,0};
		wcstombs(buf,(wchar_t *)pDirEntry->_ab,(size_t)pDirEntry->_cb);
		DirName = buf;
		
		/************************************************************************/
		/* 是否是短流                                                                     */
		/************************************************************************/
		if((pDirEntry->_ulSize < MaxMiniStreamSize)&&(pDirEntry->_mse != STGTY_ROOT)) 
		{
			FatOfDirEntry.push_back(pDirEntry->_sectStart);
			index = pDirEntry->_sectStart;
			while(sslist[index] != ENDOFCHAIN)
			{
				index=sslist[index];
				FatOfDirEntry.push_back(index);
			}
			FatOfDirEntry.push_back(sslist[index]);

		}else
		{
			FatOfDirEntry.push_back(pDirEntry->_sectStart);
			index = pDirEntry->_sectStart;
			while(slist[index] != ENDOFCHAIN)
			{
				index=slist[index];
				FatOfDirEntry.push_back(index);
			}
			FatOfDirEntry.push_back(slist[index]);
		}
		cout<<DirName<<'\t';
		for (int j =0; j < FatOfDirEntry.size();j++)
		{
			cout<<" \" \" <<DirName<<-FAT["<<j<<"] == ";
			cout<<hex<<FatOfDirEntry[j]<<'\t';
		}
		cout<<endl;
		vFatOfDirEntry.push_back(FatOfDirEntry);
		FatOfDirEntry.clear();
		
	}
	
	return true;

}



bool IfReadFile(ifstream &inStream,unsigned char * buf,unsigned int iReadOffest,size_t size)
{
	inStream.seekg(iReadOffest,ios::beg);
	inStream.read(( char* )buf,size);
	return true;	
	
}

int GetOffestFremSid(SECT sid)
{
	return sid*SectorSize+512;
}


bool DumpDocHeader(PDocHeader pHeader)
{
	cout<<"\t The comdoc flag is\t{ ";
	for (int i = 0; i < 8; i++)
	{
		cout<<hex<<(int)pHeader->_abSig[i]<<" ";
		if (i!=7)
		{
			cout<<',';
		}
	}
	cout<<'}'<<endl;
	if (pHeader->_uByteOrder == 0xFFFE)
	{
		cout<<"\t The file Byte order is Little-Endian"<<endl;
	}
	cout.setf(ios::dec,ios::basefield);
	SectorSize = (int)pow((double)2,(int)pHeader->_uSectorShift);

	cout<<"\t Size of a Sector in the compound document file is "<<SectorSize<<endl;
	miniSectorSize = (int)pow((double)2,(int)pHeader->_uMiniSectorShift);
	cout<<"\t Size of a short-sector in the short-stream container stream is "<<miniSectorSize<<endl;
	cout<<"\t Total number of sectors used for the sector allocation table is "<<(DWORD)pHeader->_csectFat<<endl;
	cout<<"\t SecID of first sector of the directory stream is "<<(ULONG)pHeader->_sectDirStart<<endl;
	cout<<"\t Minimum size of a standard stream  is "<<(ULONG)pHeader->_ulMiniSectorCutoff<<endl;
	cout<<"\t SecID of first sector of the short-sector allocation table is "<<(ULONG)pHeader->_sectMiniFatStart<<endl;
	cout<<"\t Total number of sectors used for the short-sector allocation table is "<<(ULONG)pHeader->_csectMiniFat<<endl;
	cout<<"\t SecID of first sector of the master sector allocation table is "<<pHeader->_sectDifStart<<endl;
	cout<<"\t Total number of sectors used for the master sector allocation table is "<<(ULONG)pHeader->_csectDif<<endl;
	cout<<"\t First part of the master sector allocation table containing 109 SecIDs  is "<<endl;
	cout<<"\t {\t";
	for (i = 0;i<109;i++)
	{
		if ( pHeader->_sectFat[i] != -1 )
		{
			cout<<pHeader->_sectFat[i]<<'\t';
			if (i/20 !=0)
			{
				cout<<endl;
			}
		}
		else
		{
			break;
		}	
	}
	cout<<'}'<<endl;
	return true;
}



[课程]Linux pwn 探索篇!

上传的附件:
收藏
免费 7
支持
分享
最新回复 (2)
雪    币: 420
活跃值: (77)
能力值: ( LV13,RANK:500 )
在线值:
发帖
回帖
粉丝
2
有需要的下。。。
上传的附件:
2010-5-13 10:40
0
雪    币: 348
活跃值: (10)
能力值: ( LV2,RANK:10 )
在线值:
发帖
回帖
粉丝
3
思路很清晰,用得着,谢谢分享
2010-5-13 11:08
0
游客
登录 | 注册 方可回帖
返回
//