blob: d23e6d55db7e0afe149e5eb92085b02c360d2eee [file] [log] [blame]
/*
******************************************************************************
*
* Copyright (C) 2008-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: uspoof_conf.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009Jan05
* created by: Andy Heninger
*
* Internal classes for compiling confusable data into its binary (runtime) form.
*/
#ifndef __USPOOF_BUILDCONF_H__
#define __USPOOF_BUILDCONF_H__
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
#include "uspoof_impl.h"
U_NAMESPACE_BEGIN
// SPUString
// Holds a string that is the result of one of the mappings defined
// by the confusable mapping data (confusables.txt from Unicode.org)
// Instances of SPUString exist during the compilation process only.
struct SPUString : public UMemory {
UnicodeString *fStr; // The actual string.
int32_t fStrTableIndex; // Index into the final runtime data for this string.
// (or, for length 1, the single string char itself,
// there being no string table entry for it.)
SPUString(UnicodeString *s);
~SPUString();
};
// String Pool A utility class for holding the strings that are the result of
// the spoof mappings. These strings will utimately end up in the
// run-time String Table.
// This is sort of like a sorted set of strings, except that ICU's anemic
// built-in collections don't support those, so it is implemented with a
// combination of a uhash and a UVector.
class SPUStringPool : public UMemory {
public:
SPUStringPool(UErrorCode &status);
~SPUStringPool();
// Add a string. Return the string from the table.
// If the input parameter string is already in the table, delete the
// input parameter and return the existing string.
SPUString *addString(UnicodeString *src, UErrorCode &status);
// Get the n-th string in the collection.
SPUString *getByIndex(int32_t i);
// Sort the contents; affects the ordering of getByIndex().
void sort(UErrorCode &status);
int32_t size();
private:
UVector *fVec; // Elements are SPUString *
UHashtable *fHash; // Key: UnicodeString Value: SPUString
};
// class ConfusabledataBuilder
// An instance of this class exists while the confusable data is being built from source.
// It encapsulates the intermediate data structures that are used for building.
// It exports one static function, to do a confusable data build.
class ConfusabledataBuilder : public UMemory {
private:
SpoofImpl *fSpoofImpl;
UChar *fInput;
UHashtable *fSLTable;
UHashtable *fSATable;
UHashtable *fMLTable;
UHashtable *fMATable;
UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
// The binary data is first assembled into the following four collections, then
// copied to its final raw-memory destination.
UVector *fKeyVec;
UVector *fValueVec;
UnicodeString *fStringTable;
UVector *fStringLengthsTable;
SPUStringPool *stringPool;
URegularExpression *fParseLine;
URegularExpression *fParseHexNum;
int32_t fLineNum;
ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
~ConfusabledataBuilder();
void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
// Add an entry to the key and value tables being built
// input: data from SLTable, MATable, etc.
// outut: entry added to fKeyVec and fValueVec
void addKeyEntry(UChar32 keyChar, // The key character
UHashtable *table, // The table, one of SATable, MATable, etc.
int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
UErrorCode &status);
// From an index into fKeyVec & fValueVec
// get a UnicodeString with the corresponding mapping.
UnicodeString getMapping(int32_t key);
// Populate the final binary output data array with the compiled data.
void outputData(UErrorCode &status);
public:
static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
};
U_NAMESPACE_END
#endif
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
#endif // __USPOOF_BUILDCONF_H__