blob: b8796617690f6864be9cdd43c54106321fce0e33 [file] [log] [blame] [edit]
/**
*******************************************************************************
* Copyright (C) 2006, International Business Machines Corporation and others. *
* All Rights Reserved. *
*******************************************************************************
*/
#ifndef TRIEDICT_H
#define TRIEDICT_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/utext.h"
struct UEnumeration;
struct UDataSwapper;
struct UDataMemory;
/**
* <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
*
* @param ds Pointer to UDataSwapper containing global data about the
* transformation and function pointers for handling primitive
* types.
* @param inData Pointer to the input data to be transformed or examined.
* @param length Length of the data, counting bytes. May be -1 for preflighting.
* If length>=0, then transform the data.
* If length==-1, then only determine the length of the data.
* The length cannot be determined from the data itself for all
* types of data (e.g., not for simple arrays of integers).
* @param outData Pointer to the output data buffer.
* If length>=0 (transformation), then the output buffer must
* have a capacity of at least length.
* If length==-1, then outData will not be used and can be NULL.
* @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
* fulfill U_SUCCESS on input.
* @return The actual length of the data.
*
* @see UDataSwapper
*/
U_CAPI int32_t U_EXPORT2
triedict_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
U_NAMESPACE_BEGIN
class StringEnumeration;
struct CompactTrieHeader;
/*******************************************************************
* TrieWordDictionary
*/
/**
* <p>TrieWordDictionary is an abstract class that represents a word
* dictionary based on a trie. The base protocol is read-only.
* Subclasses may allow writing.</p>
*/
class U_COMMON_API TrieWordDictionary : public UMemory {
public:
/**
* <p>Default constructor.</p>
*
*/
TrieWordDictionary();
/**
* <p>Virtual destructor.</p>
*/
virtual ~TrieWordDictionary();
/**
* <p>Find dictionary words that match the text.</p>
*
* @param text A UText representing the text. The
* iterator is left after the longest prefix match in the dictionary.
* @param start The current position in text.
* @param maxLength The maximum number of code units to match.
* @param lengths An array that is filled with the lengths of words that matched.
* @param count Filled with the number of elements output in lengths.
* @param limit The size of the lengths array; this limits the number of words output.
* @return The number of characters in text that were matched.
*/
virtual int32_t matches( UText *text,
int32_t maxLength,
int32_t *lengths,
int &count,
int limit ) const = 0;
/**
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
*
* @param status A status code recording the success of the call.
* @return A StringEnumeration that will iterate through the whole dictionary.
* The caller is responsible for closing it. The order is unspecified.
*/
virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
};
/*******************************************************************
* MutableTrieDictionary
*/
/**
* <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
* added.</p>
*/
struct TernaryNode; // Forwards declaration
class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
private:
/**
* The root node of the trie
* @internal
*/
TernaryNode *fTrie;
/**
* A UText for internal use
* @internal
*/
UText *fIter;
friend class CompactTrieDictionary; // For fast conversion
public:
/**
* <p>Constructor.</p>
*
* @param median A UChar around which to balance the trie. Ideally, it should
* begin at least one word that is near the median of the set in the dictionary
* @param status A status code recording the success of the call.
*/
MutableTrieDictionary( UChar median, UErrorCode &status );
/**
* <p>Virtual destructor.</p>
*/
virtual ~MutableTrieDictionary();
/**
* <p>Find dictionary words that match the text.</p>
*
* @param text A UText representing the text. The
* iterator is left after the longest prefix match in the dictionary.
* @param maxLength The maximum number of code units to match.
* @param lengths An array that is filled with the lengths of words that matched.
* @param count Filled with the number of elements output in lengths.
* @param limit The size of the lengths array; this limits the number of words output.
* @return The number of characters in text that were matched.
*/
virtual int32_t matches( UText *text,
int32_t maxLength,
int32_t *lengths,
int &count,
int limit ) const;
/**
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
*
* @param status A status code recording the success of the call.
* @return A StringEnumeration that will iterate through the whole dictionary.
* The caller is responsible for closing it. The order is unspecified.
*/
virtual StringEnumeration *openWords( UErrorCode &status ) const;
/**
* <p>Add one word to the dictionary.</p>
*
* @param word A UChar buffer containing the word.
* @param length The length of the word.
* @param status The resultant status
*/
virtual void addWord( const UChar *word,
int32_t length,
UErrorCode &status);
#if 0
/**
* <p>Add all strings from a UEnumeration to the dictionary.</p>
*
* @param words A UEnumeration that will return the desired words.
* @param status The resultant status
*/
virtual void addWords( UEnumeration *words, UErrorCode &status );
#endif
protected:
/**
* <p>Search the dictionary for matches.</p>
*
* @param text A UText representing the text. The
* iterator is left after the longest prefix match in the dictionary.
* @param maxLength The maximum number of code units to match.
* @param lengths An array that is filled with the lengths of words that matched.
* @param count Filled with the number of elements output in lengths.
* @param limit The size of the lengths array; this limits the number of words output.
* @param parent The parent of the current node
* @param pMatched The returned parent node matched the input
* @return The number of characters in text that were matched.
*/
virtual int32_t search( UText *text,
int32_t maxLength,
int32_t *lengths,
int &count,
int limit,
TernaryNode *&parent,
UBool &pMatched ) const;
private:
/**
* <p>Private constructor. The root node it not allocated.</p>
*
* @param status A status code recording the success of the call.
*/
MutableTrieDictionary( UErrorCode &status );
};
/*******************************************************************
* CompactTrieDictionary
*/
/**
* <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
* to save space.</p>
*/
class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
private:
/**
* The root node of the trie
*/
const CompactTrieHeader *fData;
/**
* A UBool indicating whether or not we own the fData.
*/
UBool fOwnData;
UDataMemory *fUData;
public:
/**
* <p>Construct a dictionary from a UDataMemory.</p>
*
* @param data A pointer to a UDataMemory, which is adopted
* @param status A status code giving the result of the constructor
*/
CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
/**
* <p>Construct a dictionary from raw saved data.</p>
*
* @param data A pointer to the raw data, which is still owned by the caller
* @param status A status code giving the result of the constructor
*/
CompactTrieDictionary(const void *dataObj, UErrorCode &status);
/**
* <p>Construct a dictionary from a MutableTrieDictionary.</p>
*
* @param dict The dictionary to use as input.
* @param status A status code recording the success of the call.
*/
CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
/**
* <p>Virtual destructor.</p>
*/
virtual ~CompactTrieDictionary();
/**
* <p>Find dictionary words that match the text.</p>
*
* @param text A UText representing the text. The
* iterator is left after the longest prefix match in the dictionary.
* @param maxLength The maximum number of code units to match.
* @param lengths An array that is filled with the lengths of words that matched.
* @param count Filled with the number of elements output in lengths.
* @param limit The size of the lengths array; this limits the number of words output.
* @return The number of characters in text that were matched.
*/
virtual int32_t matches( UText *text,
int32_t rangeEnd,
int32_t *lengths,
int &count,
int limit ) const;
/**
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
*
* @param status A status code recording the success of the call.
* @return A StringEnumeration that will iterate through the whole dictionary.
* The caller is responsible for closing it. The order is unspecified.
*/
virtual StringEnumeration *openWords( UErrorCode &status ) const;
/**
* <p>Return the size of the compact data.</p>
*
* @return The size of the dictionary's compact data.
*/
virtual uint32_t dataSize() const;
/**
* <p>Return a void * pointer to the compact data, platform-endian.</p>
*
* @return The data for the compact dictionary, suitable for passing to the
* constructor.
*/
virtual const void *data() const;
/**
* <p>Return a MutableTrieDictionary clone of this dictionary.</p>
*
* @param status A status code recording the success of the call.
* @return A MutableTrieDictionary with the same data as this dictionary
*/
virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
private:
/**
* <p>Convert a MutableTrieDictionary into a compact data blob.</p>
*
* @param dict The dictionary to convert.
* @param status A status code recording the success of the call.
* @return A single data blob starting with a CompactTrieHeader.
*/
static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
UErrorCode &status );
};
U_NAMESPACE_END
/* TRIEDICT_H */
#endif