/**************************************************************************** * * Copyright (c) 2000, 2001 * Machine Group * Software Research Lab. * Institute of Computing Tech. * Chinese Academy of Sciences * All rights reserved. * * This file is the confidential and proprietary property of * Institute of Computing Tech. and the posession or use of this file requires * a written license from the author. * Filename: SegGraph.h * Abstract: * interface for the Word Segmentation Directed Graph. * Author: Kevin Zhang * (zhanghp@software.ict.ac.cn) * Date: 2002-4-18 * * Notes: * * ****************************************************************************/ // SegGraph.h: interface for the CSegGraph class. // ////////////////////////////////////////////////////////////////////// #if !defined(AFX_SEGGRAPH_H__E051AD07_F18C_4171_94D6_39742104EB62__INCLUDED_) #define AFX_SEGGRAPH_H__E051AD07_F18C_4171_94D6_39742104EB62__INCLUDED_ #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 #include "..\\Utility\\Dictionary.h"//Dictionary #include "DynamicArray.h"//Dynamic array #define MAX_FREQUENCE 2079997//1993123+86874 #define MAX_SENTENCE_LEN 2000 class CSegGraph { public: bool GenerateWordArray(char *sSentence,CDictionary &dictCore); bool GenerateWordNet(char *sSentence,CDictionary &dictCore); //Generate the segmentation word net according the original sentence //sSentence: the sentence //dictCore: core dictionary CSegGraph(); virtual ~CSegGraph(); char m_sAtom[MAX_SENTENCE_LEN][WORD_MAXLENGTH];//pAtoms: the buffer for returned segmented atoms // Such as a Chinese Char, digit, single byte, or delimiters int m_nAtomLength[MAX_SENTENCE_LEN];//Save the individual length of atom in the array int m_nAtomPOS[MAX_SENTENCE_LEN];//pAtoms: the POS property unsigned int m_nAtomCount;//The count of atoms CDynamicArray m_segGraph; //segGraph: The segmentation word graph protected: bool AtomSegment(char *sSentence); //Segment the atomic members from the original sentence //sSentence: the sentence //pAtoms: the buffer for returned segmented atoms // Such as a Chinese Char, digit, single byte, or delimiters }; #endif // !defined(AFX_SEGGRAPH_H__E051AD07_F18C_4171_94D6_39742104EB62__INCLUDED_)