blob: af27d6c563dc450db4615d60f369ae9d5af8c3a8 [file] [log] [blame]
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Hunspell, based on MySpell.
*
* The Initial Developers of the Original Code are
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
* Portions created by the Initial Developers are Copyright (C) 2002-2005
* the Initial Developers. All Rights Reserved.
*
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/*
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
* And Contributors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All modifications to the source code must be clearly marked as
* such. Binary redistributions based on modified source code
* must be clearly marked as modified versions in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef HASHMGR_HXX_
#define HASHMGR_HXX_
#include <stdio.h>
#include <string>
#include <vector>
#include "htypes.hxx"
#include "filemgr.hxx"
#include "w_char.hxx"
#ifdef HUNSPELL_CHROME_CLIENT
#include <map>
#include "base/stl_util.h"
#include "base/strings/string_piece.h"
#include "third_party/hunspell/google/bdict_reader.h"
#endif
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
class HashMgr {
#ifdef HUNSPELL_CHROME_CLIENT
// Not owned by this class, owned by the Hunspell object.
hunspell::BDictReader* bdict_reader;
std::map<base::StringPiece, int> custom_word_to_affix_id_map_;
std::vector<std::string*> pointer_to_strings_;
#endif
int tablesize;
struct hentry** tableptr;
flag flag_mode;
int complexprefixes;
int utf8;
unsigned short forbiddenword;
int langnum;
std::string enc;
std::string lang;
struct cs_info* csconv;
std::string ignorechars;
std::vector<w_char> ignorechars_utf16;
int numaliasf; // flag vector `compression' with aliases
unsigned short** aliasf;
unsigned short* aliasflen;
int numaliasm; // morphological desciption `compression' with aliases
char** aliasm;
public:
#ifdef HUNSPELL_CHROME_CLIENT
HashMgr(hunspell::BDictReader* reader);
// Return the hentry corresponding to the given word. Returns NULL if the
// word is not there in the cache.
hentry* GetHentryFromHEntryCache(char* word);
// Called before we do a new operation. This will empty the cache of pointers
// to hentries that we have cached. In Chrome, we make these on-demand, but
// they must live as long as the single spellcheck operation that they're part
// of since Hunspell will save pointers to various ones as it works.
//
// This function allows that cache to be emptied and not grow infinitely.
void EmptyHentryCache();
#else
HashMgr(const char* tpath, const char* apath, const char* key = NULL);
#endif
~HashMgr();
struct hentry* lookup(const char*) const;
int hash(const char*) const;
struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
int add(const std::string& word);
int add_with_affix(const std::string& word, const std::string& pattern);
int remove(const std::string& word);
int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const;
bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
unsigned short decode_flag(const char* flag) const;
char* encode_flag(unsigned short flag) const;
int is_aliasf() const;
int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
int is_aliasm() const;
char* get_aliasm(int index) const;
private:
int get_clen_and_captype(const std::string& word, int* captype);
int load_tables(const char* tpath, const char* key);
int add_word(const std::string& word,
int wcl,
unsigned short* ap,
int al,
const std::string* desc,
bool onlyupcase);
int load_config(const char* affpath, const char* key);
bool parse_aliasf(const std::string& line, FileMgr* af);
#ifdef HUNSPELL_CHROME_CLIENT
// Loads the AF lines from a BDICT.
// A BDICT file compresses its AF lines to save memory.
// This function decompresses each AF line and call parse_aliasf().
int LoadAFLines();
// Helper functions that create a new hentry struct, initialize it, and
// delete it.
// These functions encapsulate non-trivial operations in creating and
// initializing a hentry struct from BDICT data to avoid changing code so much
// even when a hentry struct is changed.
hentry* InitHashEntry(hentry* entry,
size_t item_size,
const char* word,
int word_length,
int affix_index) const;
hentry* CreateHashEntry(const char* word,
int word_length,
int affix_index) const;
void DeleteHashEntry(hentry* entry) const;
// Converts the list of affix IDs to a linked list of hentry structures. The
// hentry structures will point to the given word. The returned pointer will
// be a statically allocated variable that will change for the next call. The
// |word| buffer must be the same.
hentry* AffixIDsToHentry(char* word, int* affix_ids, int affix_count) const;
// See EmptyHentryCache above. Note that each one is actually a linked list
// followed by the homonym pointer.
typedef std::map<std::string, hentry*> HEntryCache;
HEntryCache hentry_cache;
#endif
int add_hidden_capitalized_word(const std::string& word,
int wcl,
unsigned short* flags,
int al,
const std::string* dp,
int captype);
bool parse_aliasm(const std::string& line, FileMgr* af);
int remove_forbidden_flag(const std::string& word);
};
#endif