#include "stdafx.h"
#include "Span.h"
#include "..\\Segment\\Segment.h"
#include "..\\Utility\\Utility.h"
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
CSpan::CSpan()
{
if(m_tagType!=TT_NORMAL)
m_nTags[0][0]=100;
else
m_nTags[0][0]=0;
m_nTags[0][1]=-1;
m_dFrequency[0][0]=0;
m_nCurLength=1;
m_nUnknownIndex=0;
m_nStartPos=0;
m_nWordPosition[1]=0;
m_sWords[0][0]=0;
m_tagType=TT_NORMAL;
}
CSpan::~CSpan()
{
}
bool CSpan::Disamb()
{
int i,j,k,nMinCandidate;
double dMinFee,dTmp;
for(i=1;i<m_nCurLength;i++)
{
for(j=0;m_nTags[i][j]>=0;j++)
{
nMinCandidate=MAX_POS_PER_WORD+1;
for(k=0;m_nTags[i-1][k]>=0;k++)
{
dTmp=-log(m_context.GetContextPossibility(0,m_nTags[i-1][k],m_nTags[i][j]));
dTmp+=m_dFrequency[i-1][k];
if(nMinCandidate>10||dTmp<dMinFee)
{
nMinCandidate=k;
dMinFee=dTmp;
}
}
m_nBestPrev[i][j]=nMinCandidate;
m_dFrequency[i][j]=m_dFrequency[i][j]+dMinFee;
}
}
return true;
}
bool CSpan::Reset(bool bContinue)
{
if(!bContinue)
{
if(m_tagType!=TT_NORMAL)
m_nTags[0][0]=100;
else
m_nTags[0][0]=0;
m_nUnknownIndex=0;
m_dFrequency[0][0]=0;
m_nStartPos=0;
}
else
{
m_nTags[0][0]=m_nTags[m_nCurLength-1][0];
m_dFrequency[0][0]=m_dFrequency[m_nCurLength-1][0];
}
m_nTags[0][1]=-1;
m_nCurLength=1;
m_nWordPosition[1]=m_nStartPos;
m_sWords[0][0]=0;
return true;
}
bool CSpan::LoadContext(char *sFilename)
{
return m_context.Load(sFilename);
}
bool CSpan::UnknownMatch()
{
char sPOS[MAX_WORDS_PER_SENTENCE]="z";
int nStart,nEnd;
for(int i=1;m_nBestTag[i]>-1;i++)
sPOS[i]=m_nBestTag[i]+'a';
sPOS[i]=0;
char *pFind=strchr(sPOS+1,'b');
char *pFindEnd;
while(pFind!=NULL&&*pFind!=NULL)
{
pFindEnd=pFind+1;
while(pFindEnd!=NULL&&*pFindEnd!=NULL&&(*pFindEnd=='c'||*pFindEnd=='d'))
pFindEnd=pFindEnd+1;
nStart=pFind-sPOS;
nEnd=pFindEnd-sPOS;
m_nUnknownWords[m_nUnknownIndex][0]=m_nWordPosition[nStart];
m_nUnknownWords[m_nUnknownIndex++][1]=m_nWordPosition[nEnd];
pFind=strchr(pFindEnd,'b');
}
return true;
}
bool CSpan::GetBestPOS()
{
Disamb();
for(int i=m_nCurLength-1,j=0;i>0;i--)
{
if(m_sWords[i][0])
{
m_nBestTag[i]=m_nTags[i][j];
}
j=m_nBestPrev[i][j];
}
int nEnd=m_nCurLength;
if(m_sWords[m_nCurLength-1][0]==0)
nEnd=m_nCurLength-1;
m_nBestTag[nEnd]=-1;
return true;
}
bool CSpan::SplitPersonPOS(CDictionary &unlistDict)
{
int i=m_nCurLength-1,j;
unsigned int nLenWord,nLenPart;
char sFirstPart[50],sLastPart[50];
int nFirstPOS,nLastPOS;
for(;i>0;i--)
{
if(m_nBestTag[i]==21||m_nBestTag[i]==22)
{
for(j=m_nCurLength-1;j>i;j--)
{
strcpy(m_sWords[j+1],m_sWords[j]);
m_nBestTag[j+1]=m_nBestTag[j];
m_nWordPosition[j+1]=m_nWordPosition[j];
}
m_nCurLength+=1;
if(m_nBestTag[i]==21)
{
nLenWord=strlen(m_sWords[i]);
if(nLenWord>4)
{
strcpy(sLastPart,m_sWords[i]+nLenWord-4);
if(!unlistDict.IsExist(sLastPart,-1))
strcpy(sLastPart,m_sWords[i]+nLenWord-2);
}
else
{
strcpy(sLastPart,m_sWords[i]+nLenWord-2);
}
nLenPart=strlen(sLastPart);
if(nLenPart<nLenWord)
{
strncpy(sFirstPart,m_sWords[i],nLenWord-nLenPart);
sFirstPart[nLenWord-nLenPart]=0;
}
else
{
strncpy(sFirstPart,m_sWords[i],nLenWord-2);
sFirstPart[nLenWord-2]=0;
strncpy(sLastPart,m_sWords[i]+nLenWord-2,2);
sLastPart[2]=0;
}
nFirstPOS=11;
nLastPOS=1;
}
else
{
nLenWord=strlen(m_sWords[i]);
if(nLenWord>4)
{
strncpy(sFirstPart,m_sWords[i],4);
sFirstPart[4]=0;
if(!unlistDict.IsExist(sFirstPart,-1))
sFirstPart[2]=0;
}
else
{
strncpy(sFirstPart,m_sWords[i],2);
sFirstPart[2]=0;
}
nLenPart=strlen(sFirstPart);
if(nLenPart<nLenWord)
{
strncpy(sLastPart,m_sWords[i]+nLenPart,nLenWord-nLenPart);
sLastPart[nLenWord-nLenPart]=0;
}
else
{
strncpy(sFirstPart,m_sWords[i],2);
sFirstPart[2]=0;
strncpy(sLastPart,m_sWords[i]+2,nLenWord-2);
sLastPart[nLenWord-2]=0;
}
if(unlistDict.IsExist(sFirstPart,1)&&m_nBestTag[i-1]==5)
nFirstPOS=1;
else if(unlistDict.IsExist(m_sWords[i-1],1)&&!unlistDict.IsExist(m_sWords[i-2],1))
nFirstPOS=4;
else
nFirstPOS=3;
nLastPOS=12;
}
strcpy(m_sWords[i],sFirstPart);
m_nBestTag[i]=nFirstPOS;
strcpy(m_sWords[i+1],sLastPart);
m_nBestTag[i+1]=nLastPOS;
m_nWordPosition[i+1]=m_nWordPosition[i]+strlen(sFirstPart);
}
}
return true;
}
bool CSpan::PersonRecognize(CDictionary &personDict)
{
char sPOS[MAX_WORDS_PER_SENTENCE]="z",sPersonName[100];
char sPatterns[][5]={ "BBCD","BBC","BBE","BBZ","BCD","BEE","BE",
"BG", "BXD","BZ", "CDCD","CD","EE",
"FB", "Y","XD",""};
double dFactor[]={0.0011,0.0011,0.0011,0.0011,0.7614,0.0011,0.2055,
0.0160,0.0011,0.0011,0,0.0160,0.0011,
0.0160,0.0011,0.0011,0
};
int nPatternLen[]={4,3,3,3,3,3,2,2,3,2,4,2,2,2,1,2,0};
for(int i=1;m_nBestTag[i]>-1;i++)
sPOS[i]=m_nBestTag[i]+'A';
sPOS[i]=0;
int j=1,k,nPos;
int nLittleFreqCount;
bool bMatched=false;
while(j<i)
{
bMatched=false;
for(k=0;!bMatched&&nPatternLen[k]>0;k++)
{
if(strncmp(sPatterns[k],sPOS+j,nPatternLen[k])==0&&strcmp(m_sWords[j-1],"·")!=0&&strcmp(m_sWords[j+nPatternLen[k]],"·")!=0)
{
if(strcmp(sPatterns[k],"FB")==0&&(sPOS[j+2]=='E'||sPOS[j+2]=='C'||sPOS[j+2]=='G'))
{
continue;
}
nPos=j;
sPersonName[0]=0;
nLittleFreqCount=0;
while(nPos<j+nPatternLen[k])
{
if(m_nBestTag[nPos]<4&&personDict.GetFrequency(m_sWords[nPos],m_nBestTag[nPos])<LITTLE_FREQUENCY)
nLittleFreqCount++;
strcat(sPersonName,m_sWords[nPos]);
nPos+=1;
}
if(IsAllForeign(sPersonName)&&personDict.GetFrequency(m_sWords[j],1)<LITTLE_FREQUENCY)
{
j+=nPatternLen[k]-1;
continue;
}
if(strcmp(sPatterns[k],"CDCD")==0)
{
if(GetForeignCharCount(sPersonName)>0)
j+=nPatternLen[k]-1;
continue;
}
if(strcmp(sPatterns[k],"CD")==0&&IsAllForeign(sPersonName))
{
j+=nPatternLen[k]-1;
continue;
}
if(nLittleFreqCount==nPatternLen[k]||nLittleFreqCount==3)
continue;
m_nUnknownWords[m_nUnknownIndex][0]=m_nWordPosition[j];
m_nUnknownWords[m_nUnknownIndex][1]=m_nWordPosition[j+nPatternLen[k]];
m_dWordsPossibility[m_nUnknownIndex]=log(dFactor[k])+ComputePossibility(j,nPatternLen[k],personDict);
m_nUnknownIndex+=1;
j+=nPatternLen[k];
bMatched=true;
}
}
if(!bMatched)
j+=1;
}
return true;
}
int CSpan::GetFrom(PWORD_RESULT pWordItems,int nIndex,CDictionary &dictCore, CDictionary &dictUnknown)
{
int nCount,aPOS[MAX_POS_PER_WORD],aFreq[MAX_POS_PER_WORD];
int nFreq=0,j,nRetPos=0,nWordsIndex=0;
bool bSplit=false;
int i=1;
nWordsIndex=i+nIndex-1;
for(;i<MAX_WORDS_PER_SENTENCE&&pWordItems[nWordsIndex].sWord[0]!=0;i++)
{
if(m_tagType==TT_NORMAL||!dictUnknown.IsExist(pWordItems[nWordsIndex].sWord,44))
{
strcpy(m_sWords[i],pWordItems[nWordsIndex].sWord);
m_nWordPosition[i+1]=m_nWordPosition[i]+strlen(m_sWords[i]);
}
else
{
if(!bSplit)
{
strncpy(m_sWords[i],pWordItems[nWordsIndex].sWord,2);
m_sWords[i][2]=0;
bSplit=true;
}
else
{
unsigned int nLen=strlen(pWordItems[nWordsIndex].sWord+2);
strncpy(m_sWords[i],pWordItems[nWordsIndex].sWord+2,nLen);
m_sWords[i][nLen]=0;
bSplit=false;
}
m_nWordPosition[i+1]=m_nWordPosition[i]+strlen(m_sWords[i]);
}
m_nStartPos=m_nWordPosition[i+1];
if(m_tagType!=TT_NORMAL)
{
dictUnknown.GetHandle(m_sWords[i],&nCount,aPOS,aFreq);
for(j=0;j<nCount;j++)
{
m_nTags[i][j]=aPOS[j];
m_dFrequency[i][j]=-log((double)(1+aFreq[j]))+log((double)(m_context.GetFrequency(0,aPOS[j])+1));
}
dictCore.GetHandle(m_sWords[i],&nCount,aPOS,aFreq);
nFreq=0;
for(int k=0;k<nCount;k++)
{
nFreq+=aFreq[k];
}
if(nCount>0)
{
m_nTags[i][j]=0;
m_dFrequency[i][j]=-log((double)(1+nFreq))+log((double)(m_context.GetFrequency(0,0)+1));
j++;
}
}
else
{
j=0;
if(pWordItems[nWordsIndex].nHandle>0)
{
m_nTags[i][j]=pWordItems[nWordsIndex].nHandle;
m_dFrequency[i][j]=pWordItems[nWordsIndex].dValue-log(MAX_FREQUENCE)+log((double)(m_context.GetFrequency(0,m_nTags[i][j])+1));
if(m_dFrequency[i][j]<0)
m_dFrequency[i][j]=0;
j++;
}
else
{
if(pWordItems[nWordsIndex].nHandle<0)
{
if(pWordItems[nWordsIndex].nHandle==-'t'*256-'t')
{
char sWordOrg[100],sPostfix[10];
double dRatio=0.6925;
PostfixSplit(pWordItems[nWordsIndex].sWord,sWordOrg,sPostfix);
if(sPostfix[0]!=0)
dRatio=0.01;
m_nTags[i][j]='n'*256+'r';
m_dFrequency[i][j]=-log(dRatio)+pWordItems[nWordsIndex].dValue;
j++;
m_nTags[i][j]='n'*256+'s';
m_dFrequency[i][j]=-log(1-dRatio)+pWordItems[nWordsIndex].dValue;
j++;
}
else
{
m_nTags[i][j]=-pWordItems[nWordsIndex].nHandle;
m_dFrequency[i][j++]=pWordItems[nWordsIndex].dValue;
}
}
dictCore.GetHandle(m_sWords[i],&nCount,aPOS,aFreq);
for(;j<nCount;j++)
{
m_nTags[i][j]=aPOS[j];
m_dFrequency[i][j]=-log(1+aFreq[j])+log(m_context.GetFrequency(0,m_nTags[i][j])+1);
}
}
}
if(j==0)
{
GuessPOS(i,&j);
}
m_nTags[i][j]=-1;
if(j==1)
{
i++;
m_sWords[i][0]=0;
break;
}
if(!bSplit)
nWordsIndex++;
}
if(pWordItems[nWordsIndex].sWord[0]==0)
nRetPos=-1;
if(m_nTags[i-1][1]!=-1)
{
if(m_tagType!=TT_NORMAL)
m_nTags[i][0]=101;
else
m_nTags[i][0]=1;
m_dFrequency[i][0]=0;
m_sWords[i][0]=0;
m_nTags[i++][1]=-1;
}
m_nCurLength=i;
if(nRetPos!=-1)
return nWordsIndex+1;
return -1;
}
void CSpan::SetTagType(enum TAG_TYPE nType)
{
m_tagType=nType;
}
bool CSpan::POSTagging(PWORD_RESULT pWordItems,CDictionary &dictCore,CDictionary &dictUnknown)
{
int i=0,j,nStartPos;
Reset(false);
while(i>-1&&pWordItems[i].sWord[0]!=0)
{
nStartPos=i;
i=GetFrom(pWordItems,nStartPos,dictCore,dictUnknown);
GetBestPOS();
switch(m_tagType)
{
case TT_NORMAL:
j=1;
while(m_nBestTag[j]!=-1&&j<m_nCurLength)
{
pWordItems[j+nStartPos-1].nHandle=m_nBestTag[j];
if(pWordItems[j+nStartPos-1].dValue>0&&dictCore.IsExist(pWordItems[j+nStartPos-1].sWord,-1))
pWordItems[j+nStartPos-1].dValue=log(MAX_FREQUENCE)-log(dictCore.GetFrequency(pWordItems[j+nStartPos-1].sWord,m_nBestTag[j])+1);
j+=1;
}
break;
case TT_PERSON:
SplitPersonPOS(dictUnknown);
PersonRecognize(dictUnknown);
break;
case TT_PLACE:
PlaceRecognize(dictCore,dictUnknown);
break;
case TT_TRANS:
TransRecognize(dictCore,dictUnknown);
break;
default:
break;
}
Reset();
}
return true;
}
bool CSpan::GuessPOS(int nIndex,int *pSubIndex)
{
int j=0,i=nIndex,nCharType;
unsigned int nLen;
switch(m_tagType)
{
case TT_NORMAL:
break;
case TT_PERSON:
j=0;
if(CC_Find("××",m_sWords[nIndex]))
{
m_nTags[i][j]=6;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,6)+1);
}
else
{
m_nTags[i][j]=0;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1);
nLen=strlen(m_sWords[nIndex]);
if(nLen>=4)
{
m_nTags[i][j]=0;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1);
m_nTags[i][j]=11;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8);
m_nTags[i][j]=12;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8);
m_nTags[i][j]=13;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8);
}
else if(nLen==2)
{
m_nTags[i][j]=0;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1);
nCharType=charType((unsigned char *)m_sWords[nIndex]);
if(nCharType==CT_OTHER||nCharType==CT_CHINESE)
{
m_nTags[i][j]=1;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1);
m_nTags[i][j]=2;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)+1);
m_nTags[i][j]=3;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)+1);
m_nTags[i][j]=4;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,4)+1);
}
m_nTags[i][j]=11;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8);
m_nTags[i][j]=12;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8);
m_nTags[i][j]=13;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8);
}
}
break;
case TT_PLACE:
j=0;
m_nTags[i][j]=0;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1);
nLen=strlen(m_sWords[nIndex]);
if(nLen>=4)
{
m_nTags[i][j]=11;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8);
m_nTags[i][j]=12;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8);
m_nTags[i][j]=13;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8);
}
else if(nLen==2)
{
m_nTags[i][j]=0;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1);
nCharType=charType((unsigned char *)m_sWords[nIndex]);
if(nCharType==CT_OTHER||nCharType==CT_CHINESE)
{
m_nTags[i][j]=1;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1);
m_nTags[i][j]=2;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)+1);
m_nTags[i][j]=3;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)+1);
m_nTags[i][j]=4;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,4)+1);
}
m_nTags[i][j]=11;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*8);
m_nTags[i][j]=12;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*8);
m_nTags[i][j]=13;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*8);
}
break;
case TT_TRANS:
j=0;
nLen=strlen(m_sWords[nIndex]);
m_nTags[i][j]=0;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,0)+1);
if(!IsAllChinese((unsigned char *)m_sWords[nIndex]))
{
if(IsAllLetter((unsigned char *)m_sWords[nIndex]))
{
m_nTags[i][j]=1;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)+1);
m_nTags[i][j]=11;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)+1);
m_nTags[i][j]=2;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)*2+1);
m_nTags[i][j]=3;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)*2+1);
m_nTags[i][j]=12;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*2+1);
m_nTags[i][j]=13;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*2+1);
}
m_nTags[i][j]=41;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8);
m_nTags[i][j]=42;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8);
m_nTags[i][j]=43;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8);
}
else if(nLen>=4)
{
m_nTags[i][j]=41;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8);
m_nTags[i][j]=42;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8);
m_nTags[i][j]=43;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8);
}
else if(nLen==2)
{
nCharType=charType((unsigned char *)m_sWords[nIndex]);
if(nCharType==CT_OTHER||nCharType==CT_CHINESE)
{
m_nTags[i][j]=1;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,1)*2+1);
m_nTags[i][j]=2;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,2)*2+1);
m_nTags[i][j]=3;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,3)*2+1);
m_nTags[i][j]=30;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,30)*8+1);
m_nTags[i][j]=11;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,11)*4+1);
m_nTags[i][j]=12;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,12)*4+1);
m_nTags[i][j]=13;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,13)*4+1);
m_nTags[i][j]=21;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,21)*2+1);
m_nTags[i][j]=22;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,22)*2+1);
m_nTags[i][j]=23;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,23)*2+1);
}
m_nTags[i][j]=41;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,41)*8);
m_nTags[i][j]=42;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,42)*8);
m_nTags[i][j]=43;
m_dFrequency[i][j++]=(double)1/(double)(m_context.GetFrequency(0,43)*8);
}
break;
default:
break;
}
*pSubIndex=j;
return true;
}
ELEMENT_TYPE CSpan::ComputePossibility(int nStartPos,int nLength,CDictionary &dict)
{
ELEMENT_TYPE dRetValue=0,dPOSPoss;
int nFreq;
for(int i=nStartPos;i<nStartPos+nLength;i++)
{
nFreq=dict.GetFrequency(m_sWords[i],m_nBestTag[i]);
dPOSPoss=log((double)(m_context.GetFrequency(0,m_nBestTag[i])+1))-log((double)(nFreq+1));
dRetValue+=dPOSPoss;
}
return dRetValue;
}
bool CSpan::TransRecognize(CDictionary &dictCore,CDictionary &transDict)
{
char sPOS[MAX_WORDS_PER_SENTENCE]="Z";
int nStart=1,nEnd=1,i=1;
while(m_nBestTag[i]>-1)
{
if(m_nBestTag[i]==1||m_nBestTag[i]==11||m_nBestTag[i]==21)
{
nStart=i;
nEnd=nStart+1;
while(m_nBestTag[nEnd]==m_nBestTag[nStart])
nEnd++;
while(m_nBestTag[nEnd]==m_nBestTag[nStart]+1)
nEnd++;
while(m_nBestTag[nEnd]==m_nBestTag[nStart]+2)
nEnd++;
while(m_nBestTag[nEnd]==30)
nEnd++;
}
else if(m_nBestTag[i]==2||m_nBestTag[i]==12||m_nBestTag[i]==22)
{
nStart=i;
nEnd=nStart+1;
while(m_nBestTag[nEnd]==m_nBestTag[nStart])
nEnd++;
while(m_nBestTag[nEnd]==m_nBestTag[nStart]+1)
nEnd++;
while(m_nBestTag[nEnd]==30)
nEnd++;
}
if(nEnd>nStart&&!IsAllNum((unsigned char *)m_sWords[nStart])&&(nEnd>nStart+2||(nEnd==nStart+2&&(m_nBestTag[nEnd-1]!=30||strlen(m_sWords[nStart])>2))||(nEnd==nStart+1&&strlen(m_sWords[nStart])>2&&!dictCore.IsExist(m_sWords[nStart],-1))))
{
m_nUnknownWords[m_nUnknownIndex][0]=m_nWordPosition[nStart];
m_nUnknownWords[m_nUnknownIndex][1]=m_nWordPosition[nEnd];
m_dWordsPossibility[m_nUnknownIndex++]=ComputePossibility(nStart,nEnd-nStart+1,transDict);
nStart=nEnd;
}
if(i<nEnd)
i=nEnd;
else
i=i+1;
}
return true;
}
bool CSpan::PlaceRecognize(CDictionary &dictCore,CDictionary &placeDict)
{
int nStart=1,nEnd=1,i=1;
while(m_nBestTag[i]>-1)
{
if(m_nBestTag[i]==1)
{
nStart=i;
nEnd=nStart+1;
while(m_nBestTag[nEnd]==1)
nEnd++;
while(m_nBestTag[nEnd]==2)
nEnd++;
while(m_nBestTag[nEnd]==3)
nEnd++;
while(m_nBestTag[nEnd]==4)
nEnd++;
}
else if(m_nBestTag[i]==2)
{
nStart=i;
nEnd=nStart+1;
while(m_nBestTag[nEnd]==2)
nEnd++;
while(m_nBestTag[nEnd]==3)
nEnd++;
while(m_nBestTag[nEnd]==4)
nEnd++;
}
if(nEnd>nStart)
{
m_nUnknownWords[m_nUnknownIndex][0]=m_nWordPosition[nStart];
m_nUnknownWords[m_nUnknownIndex][1]=m_nWordPosition[nEnd];
m_dWordsPossibility[m_nUnknownIndex++]=ComputePossibility(nStart,nEnd-nStart+1,placeDict);
nStart=nEnd;
}
if(i<nEnd)
i=nEnd;
else
i=i+1;
}
return true;
}