| /** |
| ******************************************************************************* |
| * Copyright (C) 2006, International Business Machines Corporation and others. * |
| * All Rights Reserved. * |
| ******************************************************************************* |
| */ |
| |
| #ifndef TRIEDICT_H |
| #define TRIEDICT_H |
| |
| #include "unicode/utypes.h" |
| #include "unicode/uobject.h" |
| #include "unicode/utext.h" |
| |
| struct UEnumeration; |
| struct UDataSwapper; |
| struct UDataMemory; |
| |
| /** |
| * <p>UDataSwapFn function for use in swapping a compact dictionary.</p> |
| * |
| * @param ds Pointer to UDataSwapper containing global data about the |
| * transformation and function pointers for handling primitive |
| * types. |
| * @param inData Pointer to the input data to be transformed or examined. |
| * @param length Length of the data, counting bytes. May be -1 for preflighting. |
| * If length>=0, then transform the data. |
| * If length==-1, then only determine the length of the data. |
| * The length cannot be determined from the data itself for all |
| * types of data (e.g., not for simple arrays of integers). |
| * @param outData Pointer to the output data buffer. |
| * If length>=0 (transformation), then the output buffer must |
| * have a capacity of at least length. |
| * If length==-1, then outData will not be used and can be NULL. |
| * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must |
| * fulfill U_SUCCESS on input. |
| * @return The actual length of the data. |
| * |
| * @see UDataSwapper |
| */ |
| |
| U_CAPI int32_t U_EXPORT2 |
| triedict_swap(const UDataSwapper *ds, |
| const void *inData, int32_t length, void *outData, |
| UErrorCode *pErrorCode); |
| |
| U_NAMESPACE_BEGIN |
| |
| class StringEnumeration; |
| struct CompactTrieHeader; |
| |
| /******************************************************************* |
| * TrieWordDictionary |
| */ |
| |
| /** |
| * <p>TrieWordDictionary is an abstract class that represents a word |
| * dictionary based on a trie. The base protocol is read-only. |
| * Subclasses may allow writing.</p> |
| */ |
| class U_COMMON_API TrieWordDictionary : public UMemory { |
| public: |
| |
| /** |
| * <p>Default constructor.</p> |
| * |
| */ |
| TrieWordDictionary(); |
| |
| /** |
| * <p>Virtual destructor.</p> |
| */ |
| virtual ~TrieWordDictionary(); |
| |
| /** |
| * <p>Find dictionary words that match the text.</p> |
| * |
| * @param text A UText representing the text. The |
| * iterator is left after the longest prefix match in the dictionary. |
| * @param start The current position in text. |
| * @param maxLength The maximum number of code units to match. |
| * @param lengths An array that is filled with the lengths of words that matched. |
| * @param count Filled with the number of elements output in lengths. |
| * @param limit The size of the lengths array; this limits the number of words output. |
| * @return The number of characters in text that were matched. |
| */ |
| virtual int32_t matches( UText *text, |
| int32_t maxLength, |
| int32_t *lengths, |
| int &count, |
| int limit ) const = 0; |
| |
| /** |
| * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> |
| * |
| * @param status A status code recording the success of the call. |
| * @return A StringEnumeration that will iterate through the whole dictionary. |
| * The caller is responsible for closing it. The order is unspecified. |
| */ |
| virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; |
| |
| }; |
| |
| /******************************************************************* |
| * MutableTrieDictionary |
| */ |
| |
| /** |
| * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be |
| * added.</p> |
| */ |
| |
| struct TernaryNode; // Forwards declaration |
| |
| class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary { |
| private: |
| /** |
| * The root node of the trie |
| * @internal |
| */ |
| |
| TernaryNode *fTrie; |
| |
| /** |
| * A UText for internal use |
| * @internal |
| */ |
| |
| UText *fIter; |
| |
| friend class CompactTrieDictionary; // For fast conversion |
| |
| public: |
| |
| /** |
| * <p>Constructor.</p> |
| * |
| * @param median A UChar around which to balance the trie. Ideally, it should |
| * begin at least one word that is near the median of the set in the dictionary |
| * @param status A status code recording the success of the call. |
| */ |
| MutableTrieDictionary( UChar median, UErrorCode &status ); |
| |
| /** |
| * <p>Virtual destructor.</p> |
| */ |
| virtual ~MutableTrieDictionary(); |
| |
| /** |
| * <p>Find dictionary words that match the text.</p> |
| * |
| * @param text A UText representing the text. The |
| * iterator is left after the longest prefix match in the dictionary. |
| * @param maxLength The maximum number of code units to match. |
| * @param lengths An array that is filled with the lengths of words that matched. |
| * @param count Filled with the number of elements output in lengths. |
| * @param limit The size of the lengths array; this limits the number of words output. |
| * @return The number of characters in text that were matched. |
| */ |
| virtual int32_t matches( UText *text, |
| int32_t maxLength, |
| int32_t *lengths, |
| int &count, |
| int limit ) const; |
| |
| /** |
| * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> |
| * |
| * @param status A status code recording the success of the call. |
| * @return A StringEnumeration that will iterate through the whole dictionary. |
| * The caller is responsible for closing it. The order is unspecified. |
| */ |
| virtual StringEnumeration *openWords( UErrorCode &status ) const; |
| |
| /** |
| * <p>Add one word to the dictionary.</p> |
| * |
| * @param word A UChar buffer containing the word. |
| * @param length The length of the word. |
| * @param status The resultant status |
| */ |
| virtual void addWord( const UChar *word, |
| int32_t length, |
| UErrorCode &status); |
| |
| #if 0 |
| /** |
| * <p>Add all strings from a UEnumeration to the dictionary.</p> |
| * |
| * @param words A UEnumeration that will return the desired words. |
| * @param status The resultant status |
| */ |
| virtual void addWords( UEnumeration *words, UErrorCode &status ); |
| #endif |
| |
| protected: |
| /** |
| * <p>Search the dictionary for matches.</p> |
| * |
| * @param text A UText representing the text. The |
| * iterator is left after the longest prefix match in the dictionary. |
| * @param maxLength The maximum number of code units to match. |
| * @param lengths An array that is filled with the lengths of words that matched. |
| * @param count Filled with the number of elements output in lengths. |
| * @param limit The size of the lengths array; this limits the number of words output. |
| * @param parent The parent of the current node |
| * @param pMatched The returned parent node matched the input |
| * @return The number of characters in text that were matched. |
| */ |
| virtual int32_t search( UText *text, |
| int32_t maxLength, |
| int32_t *lengths, |
| int &count, |
| int limit, |
| TernaryNode *&parent, |
| UBool &pMatched ) const; |
| |
| private: |
| /** |
| * <p>Private constructor. The root node it not allocated.</p> |
| * |
| * @param status A status code recording the success of the call. |
| */ |
| MutableTrieDictionary( UErrorCode &status ); |
| }; |
| |
| /******************************************************************* |
| * CompactTrieDictionary |
| */ |
| |
| /** |
| * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted |
| * to save space.</p> |
| */ |
| class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { |
| private: |
| /** |
| * The root node of the trie |
| */ |
| |
| const CompactTrieHeader *fData; |
| |
| /** |
| * A UBool indicating whether or not we own the fData. |
| */ |
| |
| UBool fOwnData; |
| |
| UDataMemory *fUData; |
| public: |
| /** |
| * <p>Construct a dictionary from a UDataMemory.</p> |
| * |
| * @param data A pointer to a UDataMemory, which is adopted |
| * @param status A status code giving the result of the constructor |
| */ |
| CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); |
| |
| /** |
| * <p>Construct a dictionary from raw saved data.</p> |
| * |
| * @param data A pointer to the raw data, which is still owned by the caller |
| * @param status A status code giving the result of the constructor |
| */ |
| CompactTrieDictionary(const void *dataObj, UErrorCode &status); |
| |
| /** |
| * <p>Construct a dictionary from a MutableTrieDictionary.</p> |
| * |
| * @param dict The dictionary to use as input. |
| * @param status A status code recording the success of the call. |
| */ |
| CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status ); |
| |
| /** |
| * <p>Virtual destructor.</p> |
| */ |
| virtual ~CompactTrieDictionary(); |
| |
| /** |
| * <p>Find dictionary words that match the text.</p> |
| * |
| * @param text A UText representing the text. The |
| * iterator is left after the longest prefix match in the dictionary. |
| * @param maxLength The maximum number of code units to match. |
| * @param lengths An array that is filled with the lengths of words that matched. |
| * @param count Filled with the number of elements output in lengths. |
| * @param limit The size of the lengths array; this limits the number of words output. |
| * @return The number of characters in text that were matched. |
| */ |
| virtual int32_t matches( UText *text, |
| int32_t rangeEnd, |
| int32_t *lengths, |
| int &count, |
| int limit ) const; |
| |
| /** |
| * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> |
| * |
| * @param status A status code recording the success of the call. |
| * @return A StringEnumeration that will iterate through the whole dictionary. |
| * The caller is responsible for closing it. The order is unspecified. |
| */ |
| virtual StringEnumeration *openWords( UErrorCode &status ) const; |
| |
| /** |
| * <p>Return the size of the compact data.</p> |
| * |
| * @return The size of the dictionary's compact data. |
| */ |
| virtual uint32_t dataSize() const; |
| |
| /** |
| * <p>Return a void * pointer to the compact data, platform-endian.</p> |
| * |
| * @return The data for the compact dictionary, suitable for passing to the |
| * constructor. |
| */ |
| virtual const void *data() const; |
| |
| /** |
| * <p>Return a MutableTrieDictionary clone of this dictionary.</p> |
| * |
| * @param status A status code recording the success of the call. |
| * @return A MutableTrieDictionary with the same data as this dictionary |
| */ |
| virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const; |
| |
| private: |
| |
| /** |
| * <p>Convert a MutableTrieDictionary into a compact data blob.</p> |
| * |
| * @param dict The dictionary to convert. |
| * @param status A status code recording the success of the call. |
| * @return A single data blob starting with a CompactTrieHeader. |
| */ |
| static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict, |
| UErrorCode &status ); |
| |
| }; |
| |
| U_NAMESPACE_END |
| |
| /* TRIEDICT_H */ |
| #endif |