DLL_API
inline
ULONGLONG
BFPatternFind(
const
ULONGLONG
startAddr,
const
ULONGLONG
searchSize,
const
std::vector<
UCHAR
>& vecPtn,
const
std::vector<
UCHAR
>& vecMsk,
const
std::vector<
ULONG
>& vecIdx)
{
if
(searchSize < vecPtn.size()) {
return
0; }
PUCHAR
maxAddress = (
PUCHAR
)(startAddr + searchSize);
PUCHAR
currPattern = (
PUCHAR
)&vecPtn[0];
UCHAR
currEqual;
register
UCHAR
currPtnCh;
PUCHAR
currAddress = (
PUCHAR
)startAddr;
for
(
size_t
iCh = 0; iCh < vecIdx.size() && (
size_t
)currAddress <= (
size_t
)maxAddress; iCh++)
{
currPtnCh = currPattern[vecIdx[iCh]];
currPattern[vecIdx.at(iCh)] = currPtnCh + 0x1;
currEqual = ((currAddress[vecIdx[iCh]] | vecMsk.at(vecIdx[iCh])) ^ currPtnCh);
currPattern[vecIdx.at(iCh)] = currPtnCh;
if
(currEqual) {
return
0; }
if
(iCh + 1 == vecIdx.size())
{
return
(
ULONGLONG
)currAddress;
}
}
return
0;
}
DLL_API
BOOL
AVX2PatternFind256(std::vector<
ULONGLONG
>& retList,
const
ULONGLONG
searchStartAddr,
const
LONGLONG
searchSize,
const
std::string& myPattern,
const
LONGLONG
offsetSize,
const
ULONGLONG
searchNum)
{
if
(0 == searchStartAddr || 0 == searchSize) {
return
FALSE; }
ULONGLONG
realStartAddr = searchStartAddr;
if
((searchSize < 0) && (searchStartAddr > std::
abs
(searchSize)))
{
realStartAddr = searchStartAddr - std::
abs
(searchSize);
}
std::vector<
UCHAR
> vecPtn;
vecPtn.reserve(16);
std::vector<
UCHAR
> vecMsk;
vecMsk.reserve(16);
std::vector<
ULONG
> vecIdx;
vecIdx.reserve(8);
if
(!InitPattern(myPattern, vecPtn, vecMsk, vecIdx)) {
return
FALSE; }
std::vector<__m256i> m256VecPtn;
m256VecPtn.reserve(16);
std::vector<__m256i> m256VecMsk;
m256VecMsk.reserve(16);
for
(
size_t
k = 0; k < vecIdx.size(); k++)
{
m256VecPtn.push_back(_mm256_set1_epi8(vecPtn.at(vecIdx[k])));
m256VecMsk.push_back(_mm256_set1_epi8(vecMsk.at(vecIdx[k])));
}
UCHAR
bakVecPtnCh = vecPtn.at(vecIdx[0]);
vecPtn.at(vecIdx[0]) += 1;
retList.clear();
retList.reserve(16);
__m256i curMemByte, curCmp, curByteCorr;
register
size_t
curBit = 0;
PUCHAR
currMemAddr;
size_t
maxEndSize = min(std::
abs
(searchSize) - vecPtn.size(), std::
abs
(searchSize) - 32);
for
(
size_t
i = vecIdx[0]; i <= maxEndSize; i += 32)
{
PUCHAR
baseMemAddr = (
PUCHAR
)(realStartAddr + i - vecIdx[0]);
size_t
prevCmpBit = 0xFFFFFFFF;
for
(
size_t
j = 0; j < vecIdx.size(); j++)
{
curMemByte = _mm256_loadu_si256((__m256i*)(baseMemAddr + vecIdx[j]));
curByteCorr = _mm256_or_si256(curMemByte, m256VecMsk.at(j));
curCmp = _mm256_cmpeq_epi8(m256VecPtn.at(j), curByteCorr);
curBit = _mm256_movemask_epi8(curCmp);
curBit = curBit & prevCmpBit;
if
(0 == curBit) {
break
; }
prevCmpBit = curBit;
if
(j + 1 == vecIdx.size())
{
ULONG
bitIdx = 0, n = 0;
while
(_BitScanForward(&bitIdx, curBit))
{
currMemAddr = baseMemAddr + n + bitIdx;
retList.push_back((
size_t
)(currMemAddr + offsetSize));
if
(searchNum != 0 && retList.size() >= searchNum) {
return
TRUE; }
++bitIdx;
curBit = curBit >> bitIdx;
n += bitIdx;
}
}
}
}
vecPtn.at(vecIdx[0]) = bakVecPtnCh;
if
(vecPtn.size() < 32)
{
ULONGLONG
tmpStarAddr = realStartAddr + maxEndSize + 1;
ULONGLONG
tmpSearchSize = std::
abs
(searchSize) - maxEndSize - 1;
for
(
int
i = 0; i <= tmpSearchSize - vecPtn.size(); i += vecPtn.size())
{
ULONGLONG
tailPtnAddr = BFPatternFind(tmpStarAddr + i, tmpSearchSize - i, vecPtn, vecMsk, vecIdx);
if
(tailPtnAddr)
{
retList.push_back(tailPtnAddr);
}
}
}
return
TRUE;
}