blob: 51d956beee36b71c957d4dca198ac92c1a0f53bc [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_
#define THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_
#include <stddef.h>
#include <string>
#include <vector>
#include "base/macros.h"
#include "third_party/hunspell/google/bdict.h"
namespace hunspell {
class BDictReader;
class NodeReader;
// Iterators -------------------------------------------------------------------
// Iterates through all words in the dictionary. It will fill the word into
// a caller-specified buffer.
class WordIterator {
public:
WordIterator(const WordIterator& other);
~WordIterator();
// This must be explicitly declared and implemneted in the .cc file so it will
// compile without knowing the size of NodeInfo.
WordIterator& operator=(const WordIterator&);
// Fills the buffer with the next word and the affixes for it into the given
// array. Returns the number of affixes. A return value of 0 means there are
// no more words.
int Advance(char* output_buffer, size_t output_len,
int affix_ids[BDict::MAX_AFFIXES_PER_WORD]);
private:
friend class BDictReader;
struct NodeInfo;
WordIterator(const NodeReader& reader);
// Called by Advance when a leaf is found to generate the word, affix list,
// and return value.
int FoundLeaf(const NodeReader& node, char cur_char,
char* output_buffer, size_t output_len,
int affix_ids[BDict::MAX_AFFIXES_PER_WORD]);
std::vector<NodeInfo> stack_;
};
// Will iterate over a list of lines separated by NULLs.
class LineIterator {
public:
// Returns the next word in the sequence or NULL if there are no mode.
const char* Advance();
// Advances to the next word in the sequence and copies it into the given
// buffer, of the given length. If it doesn't fit, it will be truncated.
// Returns true on success.
bool AdvanceAndCopy(char* buf, size_t buf_len);
// Returns true when all data has been read. We're done when we reach a
// double-NULL or a the end of the input (shouldn't happen).
bool IsDone() const;
protected:
friend class BDictReader;
LineIterator(const unsigned char* bdict_data, size_t bdict_length,
size_t first_offset);
const unsigned char* bdict_data_;
size_t bdict_length_;
// Current offset within bdict_data of the next string to read.
size_t cur_offset_;
};
// Created by GetReplacementIterator to iterate over all replacement pairs.
class ReplacementIterator : public LineIterator {
public:
// Fills pointers to NULL terminated strings into the given output params.
// Returns false if there are no more pairs and nothing was filled in.
bool GetNext(const char** first, const char** second);
private:
friend class BDictReader;
ReplacementIterator(const unsigned char* bdict_data, size_t bdict_length,
size_t first_offset)
: LineIterator(bdict_data, bdict_length, first_offset) {
}
};
// Reads a BDict file mapped into memory.
class BDictReader {
public:
// You must call Init and it must succeed before calling any other functions.
BDictReader();
// Initializes the reader with the given data. The data does not transfer
// ownership, and the caller must keep it valid until the reader is destroyed.
// Returns true on success.
bool Init(const unsigned char* bdic_data, size_t bdic_length);
// Returns true if Init() succeeded and other functions can be called.
bool IsValid() const { return !!bdict_data_; }
// Locates the given word in the dictionary. There may be multiple matches if
// the word is listed multiple times in the dictionary with different affix
// rules.
//
// The number of matches is returned, and that number of corresponding affix
// group IDs are filled into |*affix_indices|. These IDs may be 0 to indicate
// there is no affix for that particular match. A return valuf of 0 means that
// there are no matches.
int FindWord(const char* word,
int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const;
// Returns an iterator that will go over all AF lines ("affix groups").
LineIterator GetAfLineIterator() const;
// Returns an iterator that will go over all SFX/PFX lines ("affix rules").
LineIterator GetAffixLineIterator() const;
// Returns an iterator that will go over all "other" lines.
LineIterator GetOtherLineIterator() const;
// Returns an iterator that can be used to iterate all replacements.
ReplacementIterator GetReplacementIterator() const;
// Used for testing, returns an iterator for all words in the dictionary.
WordIterator GetAllWordIterator() const;
private:
// Non-NULL indicates Init succeeded.
const unsigned char* bdict_data_;
size_t bdict_length_;
// Pointer not owned by this class. It will point into the data. It will be
// NULL if the data is invalid.
const BDict::Header* header_;
const BDict::AffHeader* aff_header_;
DISALLOW_COPY_AND_ASSIGN(BDictReader);
};
} // namespace hunspell
#endif // THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_