chromium/src/third_party/hunspell/src/hunspell/hunspell.cxx - manifest_repos/chromium_src - Git at Google

 /* ***** BEGIN LICENSE BLOCK *****
  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  *
  * The contents of this file are subject to the Mozilla Public License Version
  * 1.1 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  * http://www.mozilla.org/MPL/
  *
  * Software distributed under the License is distributed on an "AS IS" basis,
  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  * for the specific language governing rights and limitations under the
  * License.
  *
  * The Original Code is Hunspell, based on MySpell.
  *
  * The Initial Developers of the Original Code are
  * Kevin Hendricks (MySpell) and Németh László (Hunspell).
  * Portions created by the Initial Developers are Copyright (C) 2002-2005
  * the Initial Developers. All Rights Reserved.
  *
  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 /*
  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
  * And Contributors.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * 3. All modifications to the source code must be clearly marked as
  *    such.  Binary redistributions based on modified source code
  *    must be clearly marked as modified versions in the documentation
  *    and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */

 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>

 #include "affixmgr.hxx"
 #include "hunspell.hxx"
 #include "suggestmgr.hxx"
 #include "hunspell.h"
 #ifndef HUNSPELL_CHROME_CLIENT
 #    include "config.h"
 #endif
 #include "csutil.hxx"

 #include <limits>
 #include <string>

 #define MAXWORDUTF8LEN (MAXWORDLEN * 2)

 class HunspellImpl
 {
 public:
 #ifdef HUNSPELL_CHROME_CLIENT
   HunspellImpl(const unsigned char* bdict_data, size_t bdict_length);
 #else
   HunspellImpl(const char* affpath, const char* dpath, const char* key);
 #endif
   ~HunspellImpl();
 #ifndef HUNSPELL_CHROME_CLIENT
   int add_dic(const char* dpath, const char* key);
 #endif
   std::vector<std::string> suffix_suggest(const std::string& root_word);
   std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
   std::vector<std::string> generate(const std::string& word, const std::string& pattern);
   std::vector<std::string> stem(const std::string& word);
   std::vector<std::string> stem(const std::vector<std::string>& morph);
   std::vector<std::string> analyze(const std::string& word);
   int get_langnum() const;
   bool input_conv(const std::string& word, std::string& dest);
   bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
   std::vector<std::string> suggest(const std::string& word);
   const std::string& get_wordchars() const;
   const std::vector<w_char>& get_wordchars_utf16() const;
   const std::string& get_dict_encoding() const;
   int add(const std::string& word);
   int add_with_affix(const std::string& word, const std::string& example);
   int remove(const std::string& word);
   const std::string& get_version() const;
   struct cs_info* get_csconv();
   std::vector<char> dic_encoding_vec;

 private:
   AffixMgr* pAMgr;
   std::vector<HashMgr*> m_HMgrs;
   SuggestMgr* pSMgr;
 #ifndef HUNSPELL_CHROME_CLIENT // We are using BDict instead.
   char* affixpath;
 #endif
   std::string encoding;
   struct cs_info* csconv;
   int langnum;
   int utf8;
   int complexprefixes;
   std::vector<std::string> wordbreak;

 #ifdef HUNSPELL_CHROME_CLIENT
   // Not owned by us, owned by the Hunspell object.
   hunspell::BDictReader* bdict_reader;
 #endif

 private:
   void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
   size_t cleanword2(std::string& dest,
                     std::vector<w_char>& dest_u,
                     const std::string& src,
                     int* pcaptype,
                     size_t* pabbrev);
   void mkinitcap(std::string& u8);
   int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
   int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
   void mkallcap(std::string& u8);
   int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
   struct hentry* checkword(const std::string& source, int* info, std::string* root);
   std::string sharps_u8_l1(const std::string& source);
   hentry*
   spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
   int is_keepcase(const hentry* rv);
   void insert_sug(std::vector<std::string>& slst, const std::string& word);
   void cat_result(std::string& result, const std::string& st);
   std::vector<std::string> spellml(const std::string& word);
   std::string get_xml_par(const char* par);
   const char* get_xml_pos(const char* s, const char* attr);
   std::vector<std::string> get_xml_list(const char* list, const char* tag);
   int check_xml_par(const char* q, const char* attr, const char* value);
 private:
   HunspellImpl(const HunspellImpl&);
   HunspellImpl& operator=(const HunspellImpl&);
 };

 #ifdef HUNSPELL_CHROME_CLIENT
 Hunspell::Hunspell(const unsigned char* bdict_data, size_t bdict_length)
   : m_Impl(new HunspellImpl(bdict_data, bdict_length)) {
 #else
 Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
   : m_Impl(new HunspellImpl(affpath, dpath, key)) {
 #endif
 }

 #ifdef HUNSPELL_CHROME_CLIENT
 HunspellImpl::HunspellImpl(const unsigned char* bdict_data, size_t bdict_length) {
 #else
 HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
 #endif
   csconv = NULL;
   utf8 = 0;
   complexprefixes = 0;
 #ifndef HUNSPELL_CHROME_CLIENT
   affixpath = mystrdup(affpath);
 #endif

 #ifdef HUNSPELL_CHROME_CLIENT
   bdict_reader = new hunspell::BDictReader;
   bdict_reader->Init(bdict_data, bdict_length);

   /* first set up the hash manager */
   m_HMgrs.push_back(new HashMgr(bdict_reader));

   pAMgr = new AffixMgr(bdict_reader, m_HMgrs); // TODO: 'key' ?
 #else
   /* first set up the hash manager */
   m_HMgrs.push_back(new HashMgr(dpath, affpath, key));

   /* next set up the affix manager */
   /* it needs access to the hash manager lookup methods */
   pAMgr = new AffixMgr(affpath, m_HMgrs, key);
 #endif

   /* get the preferred try string and the dictionary */
   /* encoding from the Affix Manager for that dictionary */
   char* try_string = pAMgr->get_try_string();
   encoding = pAMgr->get_encoding();
   langnum = pAMgr->get_langnum();
   utf8 = pAMgr->get_utf8();
   if (!utf8)
     csconv = get_current_cs(encoding);
   complexprefixes = pAMgr->get_complexprefixes();
   wordbreak = pAMgr->get_breaktable();

   dic_encoding_vec.resize(encoding.size()+1);
   strcpy(&dic_encoding_vec[0], encoding.c_str());

   /* and finally set up the suggestion manager */
 #ifdef HUNSPELL_CHROME_CLIENT
   pSMgr = new SuggestMgr(bdict_reader, try_string, MAXSUGGESTION, pAMgr);
 #else
   pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
 #endif
   if (try_string)
     free(try_string);
 }

 Hunspell::~Hunspell() {
   delete m_Impl;
 }

 HunspellImpl::~HunspellImpl() {
   delete pSMgr;
   delete pAMgr;
   for (size_t i = 0; i < m_HMgrs.size(); ++i)
     delete m_HMgrs[i];
   pSMgr = NULL;
   pAMgr = NULL;
 #ifdef MOZILLA_CLIENT
   delete[] csconv;
 #endif
   csconv = NULL;
 #ifdef HUNSPELL_CHROME_CLIENT
     if (bdict_reader) delete bdict_reader;
     bdict_reader = NULL;
 #else
   if (affixpath)
     free(affixpath);
   affixpath = NULL;
 #endif
 }

 #ifndef HUNSPELL_CHROME_CLIENT
 // load extra dictionaries
 int Hunspell::add_dic(const char* dpath, const char* key) {
   return m_Impl->add_dic(dpath, key);
 }

 // load extra dictionaries
 int HunspellImpl::add_dic(const char* dpath, const char* key) {
   if (!affixpath)
     return 1;
   m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
   return 0;
 }
 #endif

 // make a copy of src at destination while removing all leading
 // blanks and removing any trailing periods after recording
 // their presence with the abbreviation flag
 // also since already going through character by character,
 // set the capitalization type
 // return the length of the "cleaned" (and UTF-8 encoded) word

 size_t HunspellImpl::cleanword2(std::string& dest,
                          std::vector<w_char>& dest_utf,
                          const std::string& src,
                          int* pcaptype,
                          size_t* pabbrev) {
   dest.clear();
   dest_utf.clear();

   const char* q = src.c_str();

   // first skip over any leading blanks
   while ((*q != '\0') && (*q == ' '))
     q++;

   // now strip off any trailing periods (recording their presence)
   *pabbrev = 0;
   int nl = strlen(q);
   while ((nl > 0) && (*(q + nl - 1) == '.')) {
     nl--;
     (*pabbrev)++;
   }

   // if no characters are left it can't be capitalized
   if (nl <= 0) {
     *pcaptype = NOCAP;
     return 0;
   }

   dest.append(q, nl);
   nl = dest.size();
   if (utf8) {
     u8_u16(dest_utf, dest);
     *pcaptype = get_captype_utf8(dest_utf, langnum);
   } else {
     *pcaptype = get_captype(dest, csconv);
   }
   return nl;
 }

 void HunspellImpl::cleanword(std::string& dest,
                         const std::string& src,
                         int* pcaptype,
                         int* pabbrev) {
   dest.clear();
   const unsigned char* q = (const unsigned char*)src.c_str();
   int firstcap = 0;

   // first skip over any leading blanks
   while ((*q != '\0') && (*q == ' '))
     q++;

   // now strip off any trailing periods (recording their presence)
   *pabbrev = 0;
   int nl = strlen((const char*)q);
   while ((nl > 0) && (*(q + nl - 1) == '.')) {
     nl--;
     (*pabbrev)++;
   }

   // if no characters are left it can't be capitalized
   if (nl <= 0) {
     *pcaptype = NOCAP;
     return;
   }

   // now determine the capitalization type of the first nl letters
   int ncap = 0;
   int nneutral = 0;
   int nc = 0;

   if (!utf8) {
     while (nl > 0) {
       nc++;
       if (csconv[(*q)].ccase)
         ncap++;
       if (csconv[(*q)].cupper == csconv[(*q)].clower)
         nneutral++;
       dest.push_back(*q++);
       nl--;
     }
     // remember to terminate the destination string
     firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
   } else {
     std::vector<w_char> t;
     u8_u16(t, src);
     for (size_t i = 0; i < t.size(); ++i) {
       unsigned short idx = (t[i].h << 8) + t[i].l;
       unsigned short low = unicodetolower(idx, langnum);
       if (idx != low)
         ncap++;
       if (unicodetoupper(idx, langnum) == low)
         nneutral++;
     }
     u16_u8(dest, t);
     if (ncap) {
       unsigned short idx = (t[0].h << 8) + t[0].l;
       firstcap = (idx != unicodetolower(idx, langnum));
     }
   }

   // now finally set the captype
   if (ncap == 0) {
     *pcaptype = NOCAP;
   } else if ((ncap == 1) && firstcap) {
     *pcaptype = INITCAP;
   } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
     *pcaptype = ALLCAP;
   } else if ((ncap > 1) && firstcap) {
     *pcaptype = HUHINITCAP;
   } else {
     *pcaptype = HUHCAP;
   }
 }

 void HunspellImpl::mkallcap(std::string& u8) {
   if (utf8) {
     std::vector<w_char> u16;
     u8_u16(u16, u8);
     ::mkallcap_utf(u16, langnum);
     u16_u8(u8, u16);
   } else {
     ::mkallcap(u8, csconv);
   }
 }

 int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
   if (utf8) {
     ::mkallsmall_utf(u16, langnum);
     u16_u8(u8, u16);
   } else {
     ::mkallsmall(u8, csconv);
   }
   return u8.size();
 }

 // convert UTF-8 sharp S codes to latin 1
 std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
   std::string dest(source);
   mystrrep(dest, "\xC3\x9F", "\xDF");
   return dest;
 }

 // recursive search for right ss - sharp s permutations
 hentry* HunspellImpl::spellsharps(std::string& base,
                               size_t n_pos,
                               int n,
                               int repnum,
                               int* info,
                               std::string* root) {
   size_t pos = base.find("ss", n_pos);
   if (pos != std::string::npos && (n < MAXSHARPS)) {
     base[pos] = '\xC3';
     base[pos + 1] = '\x9F';
     hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
     if (h)
       return h;
     base[pos] = 's';
     base[pos + 1] = 's';
     h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
     if (h)
       return h;
   } else if (repnum > 0) {
     if (utf8)
       return checkword(base, info, root);
     std::string tmp(sharps_u8_l1(base));
     return checkword(tmp, info, root);
   }
   return NULL;
 }

 int HunspellImpl::is_keepcase(const hentry* rv) {
   return pAMgr && rv->astr && pAMgr->get_keepcase() &&
          TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
 }

 /* insert a word to the beginning of the suggestion array */
 void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
   slst.insert(slst.begin(), word);
 }

 bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
   return m_Impl->spell(word, info, root);
 }

 bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
 #ifdef HUNSPELL_CHROME_CLIENT
   if (m_HMgrs[0]) m_HMgrs[0]->EmptyHentryCache();
 #endif
   struct hentry* rv = NULL;

   int info2 = 0;
   if (!info)
     info = &info2;
   else
     *info = 0;

   // Hunspell supports XML input of the simplified API (see manual)
   if (word == SPELL_XML)
     return true;
   if (utf8) {
     if (word.size() >= MAXWORDUTF8LEN)
       return false;
   } else {
     if (word.size() >= MAXWORDLEN)
       return false;
   }
   int captype = NOCAP;
   size_t abbv = 0;
   size_t wl = 0;

   std::string scw;
   std::vector<w_char> sunicw;

   // input conversion
   RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
   {
     std::string wspace;

     bool convstatus = rl ? rl->conv(word, wspace) : false;
     if (convstatus)
       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
     else
       wl = cleanword2(scw, sunicw, word, &captype, &abbv);
   }

 #ifdef MOZILLA_CLIENT
   // accept the abbreviated words without dots
   // workaround for the incomplete tokenization of Mozilla
   abbv = 1;
 #endif

   if (wl == 0 || m_HMgrs.empty())
     return true;
   if (root)
     root->clear();

   // allow numbers with dots, dashes and commas (but forbid double separators:
   // "..", "--" etc.)
   enum { NBEGIN, NNUM, NSEP };
   int nstate = NBEGIN;
   size_t i;

   for (i = 0; (i < wl); i++) {
     if ((scw[i] <= '9') && (scw[i] >= '0')) {
       nstate = NNUM;
     } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
       if ((nstate == NSEP) || (i == 0))
         break;
       nstate = NSEP;
     } else
       break;
   }
   if ((i == wl) && (nstate == NNUM))
     return true;

   switch (captype) {
     case HUHCAP:
     /* FALLTHROUGH */
     case HUHINITCAP:
       *info += SPELL_ORIGCAP;
     /* FALLTHROUGH */
     case NOCAP:
       rv = checkword(scw, info, root);
       if ((abbv) && !(rv)) {
         std::string u8buffer(scw);
         u8buffer.push_back('.');
         rv = checkword(u8buffer, info, root);
       }
       break;
     case ALLCAP: {
       *info += SPELL_ORIGCAP;
       rv = checkword(scw, info, root);
       if (rv)
         break;
       if (abbv) {
         std::string u8buffer(scw);
         u8buffer.push_back('.');
         rv = checkword(u8buffer, info, root);
         if (rv)
           break;
       }
       // Spec. prefix handling for Catalan, French, Italian:
       // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
       size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
       if (apos != std::string::npos) {
         mkallsmall2(scw, sunicw);
         //conversion may result in string with different len to pre-mkallsmall2
         //so re-scan
         if (apos != std::string::npos && apos < scw.size() - 1) {
           std::string part1 = scw.substr(0, apos+1);
           std::string part2 = scw.substr(apos+1);
           if (utf8) {
             std::vector<w_char> part1u, part2u;
             u8_u16(part1u, part1);
             u8_u16(part2u, part2);
             mkinitcap2(part2, part2u);
             scw = part1 + part2;
             sunicw = part1u;
             sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
             rv = checkword(scw, info, root);
             if (rv)
               break;
           } else {
             mkinitcap2(part2, sunicw);
             scw = part1 + part2;
             rv = checkword(scw, info, root);
             if (rv)
               break;
           }
           mkinitcap2(scw, sunicw);
           rv = checkword(scw, info, root);
           if (rv)
             break;
         }
       }
       if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {

         mkallsmall2(scw, sunicw);
         std::string u8buffer(scw);
         rv = spellsharps(u8buffer, 0, 0, 0, info, root);
         if (!rv) {
           mkinitcap2(scw, sunicw);
           rv = spellsharps(scw, 0, 0, 0, info, root);
         }
         if ((abbv) && !(rv)) {
           u8buffer.push_back('.');
           rv = spellsharps(u8buffer, 0, 0, 0, info, root);
           if (!rv) {
             u8buffer = std::string(scw);
             u8buffer.push_back('.');
             rv = spellsharps(u8buffer, 0, 0, 0, info, root);
           }
         }
         if (rv)
           break;
       }
     }
     case INITCAP: {

       *info += SPELL_ORIGCAP;
       mkallsmall2(scw, sunicw);
       std::string u8buffer(scw);
       mkinitcap2(scw, sunicw);
       if (captype == INITCAP)
         *info += SPELL_INITCAP;
       rv = checkword(scw, info, root);
       if (captype == INITCAP)
         *info -= SPELL_INITCAP;
       // forbid bad capitalization
       // (for example, ijs -> Ijs instead of IJs in Dutch)
       // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
       if (*info & SPELL_FORBIDDEN) {
         rv = NULL;
         break;
       }
       if (rv && is_keepcase(rv) && (captype == ALLCAP))
         rv = NULL;
       if (rv)
         break;

       rv = checkword(u8buffer, info, root);
       if (abbv && !rv) {
         u8buffer.push_back('.');
         rv = checkword(u8buffer, info, root);
         if (!rv) {
           u8buffer = scw;
           u8buffer.push_back('.');
           if (captype == INITCAP)
             *info += SPELL_INITCAP;
           rv = checkword(u8buffer, info, root);
           if (captype == INITCAP)
             *info -= SPELL_INITCAP;
           if (rv && is_keepcase(rv) && (captype == ALLCAP))
             rv = NULL;
           break;
         }
       }
       if (rv && is_keepcase(rv) &&
           ((captype == ALLCAP) ||
            // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
            // in INITCAP form, too.
            !(pAMgr->get_checksharps() &&
              ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
               (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
         rv = NULL;
       break;
     }
   }

   if (rv) {
     if (pAMgr && pAMgr->get_warn() && rv->astr &&
         TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
       *info += SPELL_WARN;
       if (pAMgr->get_forbidwarn())
         return false;
       return true;
     }
     return true;
   }

   // recursive breaking at break points
   if (!wordbreak.empty()) {

     int nbr = 0;
     wl = scw.size();

     // calculate break points for recursion limit
     for (size_t j = 0; j < wordbreak.size(); ++j) {
       size_t pos = 0;
       while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
         ++nbr;
         pos += wordbreak[j].size();
       }
     }
     if (nbr >= 10)
       return false;

     // check boundary patterns (^begin and end$)
     for (size_t j = 0; j < wordbreak.size(); ++j) {
       size_t plen = wordbreak[j].size();
       if (plen == 1 || plen > wl)
         continue;

       if (wordbreak[j][0] == '^' &&
           scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
         return true;

       if (wordbreak[j][plen - 1] == '$' &&
           scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
         std::string suffix(scw.substr(wl - plen + 1));
         scw.resize(wl - plen + 1);
         if (spell(scw))
           return true;
         scw.append(suffix);
       }
     }

     // other patterns
     for (size_t j = 0; j < wordbreak.size(); ++j) {
       size_t plen = wordbreak[j].size();
       size_t found = scw.find(wordbreak[j]);
       if ((found > 0) && (found < wl - plen)) {
         if (!spell(scw.substr(found + plen)))
           continue;
         std::string suffix(scw.substr(found));
         scw.resize(found);
         // examine 2 sides of the break point
         if (spell(scw))
           return true;
         scw.append(suffix);

         // LANG_hu: spec. dash rule
         if (langnum == LANG_hu && wordbreak[j] == "-") {
           suffix = scw.substr(found + 1);
           scw.resize(found + 1);
           if (spell(scw))
             return true;  // check the first part with dash
           scw.append(suffix);
         }
         // end of LANG specific region
       }
     }
   }

   return false;
 }

 struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
   bool usebuffer = false;
   std::string w2;
   const char* word;
   int len;

   const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
   if (ignoredchars != NULL) {
     w2.assign(w);
     if (utf8) {
       const std::vector<w_char>& ignoredchars_utf16 =
           pAMgr->get_ignore_utf16();
       remove_ignored_chars_utf(w2, ignoredchars_utf16);
     } else {
       remove_ignored_chars(w2, ignoredchars);
     }
     word = w2.c_str();
     len = w2.size();
     usebuffer = true;
   } else {
     word = w.c_str();
     len = w.size();
   }

   if (!len)
     return NULL;

 #ifdef HUNSPELL_CHROME_CLIENT
   // We need to check if the word length is valid to make coverity (Event
   // fixed_size_dest: Possible overrun of N byte fixed size buffer) happy.
   if ((utf8 && strlen(word) >= MAXWORDUTF8LEN) || (!utf8 && strlen(word) >= MAXWORDLEN))
     return NULL;
 #endif

   // word reversing wrapper for complex prefixes
   if (complexprefixes) {
     if (!usebuffer) {
       w2.assign(word);
       usebuffer = true;
     }
     if (utf8)
       reverseword_utf(w2);
     else
       reverseword(w2);
   }

   if (usebuffer) {
     word = w2.c_str();
   }

   // look word in hash table
   struct hentry* he = NULL;
   for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
     he = m_HMgrs[i]->lookup(word);

     // check forbidden and onlyincompound words
     if ((he) && (he->astr) && (pAMgr) &&
         TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
       if (info)
         *info += SPELL_FORBIDDEN;
       // LANG_hu section: set dash information for suggestions
       if (langnum == LANG_hu) {
         if (pAMgr->get_compoundflag() &&
             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
           if (info)
             *info += SPELL_COMPOUND;
         }
       }
       return NULL;
     }

     // he = next not needaffix, onlyincompound homonym or onlyupcase word
     while (he && (he->astr) && pAMgr &&
            ((pAMgr->get_needaffix() &&
              TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
             (pAMgr->get_onlyincompound() &&
              TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
             (info && (*info & SPELL_INITCAP) &&
              TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
       he = he->next_homonym;
   }

   // check with affixes
   if (!he && pAMgr) {
     // try stripping off affixes */
     he = pAMgr->affix_check(word, len, 0);

     // check compound restriction and onlyupcase
     if (he && he->astr &&
         ((pAMgr->get_onlyincompound() &&
           TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
          (info && (*info & SPELL_INITCAP) &&
           TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
       he = NULL;
     }

     if (he) {
       if ((he->astr) && (pAMgr) &&
           TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
         if (info)
           *info += SPELL_FORBIDDEN;
         return NULL;
       }
       if (root) {
         root->assign(he->word);
         if (complexprefixes) {
           if (utf8)
             reverseword_utf(*root);
           else
             reverseword(*root);
         }
       }
       // try check compound word
     } else if (pAMgr->get_compound()) {
       struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
       he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
       // LANG_hu section: `moving rule' with last dash
       if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
         std::string dup(word, len - 1);
         he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
       }
       // end of LANG specific region
       if (he) {
         if (root) {
           root->assign(he->word);
           if (complexprefixes) {
             if (utf8)
               reverseword_utf(*root);
             else
               reverseword(*root);
           }
         }
         if (info)
           *info += SPELL_COMPOUND;
       }
     }
   }

   return he;
 }

 std::vector<std::string> Hunspell::suggest(const std::string& word) {
   return m_Impl->suggest(word);
 }

 std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
 #ifdef HUNSPELL_CHROME_CLIENT
   if (m_HMgrs[0]) m_HMgrs[0]->EmptyHentryCache();
 #endif
   std::vector<std::string> slst;

   int onlycmpdsug = 0;
   if (!pSMgr || m_HMgrs.empty())
     return slst;

   // process XML input of the simplified API (see manual)
   if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
     return spellml(word);
   }
   if (utf8) {
     if (word.size() >= MAXWORDUTF8LEN)
       return slst;
   } else {
     if (word.size() >= MAXWORDLEN)
       return slst;
   }
   int captype = NOCAP;
   size_t abbv = 0;
   size_t wl = 0;

   std::string scw;
   std::vector<w_char> sunicw;

   // input conversion
   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   {
     std::string wspace;

     bool convstatus = rl ? rl->conv(word, wspace) : false;
     if (convstatus)
       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
     else
       wl = cleanword2(scw, sunicw, word, &captype, &abbv);

     if (wl == 0)
       return slst;
   }

   int capwords = 0;

   // check capitalized form for FORCEUCASE
   if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
     int info = SPELL_ORIGCAP;
     if (checkword(scw, &info, NULL)) {
       std::string form(scw);
       mkinitcap(form);
       slst.push_back(form);
       return slst;
     }
   }

   switch (captype) {
     case NOCAP: {
       pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
       break;
     }

     case INITCAP: {
       capwords = 1;
       pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
       std::string wspace(scw);
       mkallsmall2(wspace, sunicw);
       pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
       break;
     }
     case HUHINITCAP:
       capwords = 1;
     case HUHCAP: {
       pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
       // something.The -> something. The
       size_t dot_pos = scw.find('.');
       if (dot_pos != std::string::npos) {
         std::string postdot = scw.substr(dot_pos + 1);
         int captype_;
         if (utf8) {
           std::vector<w_char> postdotu;
           u8_u16(postdotu, postdot);
           captype_ = get_captype_utf8(postdotu, langnum);
         } else {
           captype_ = get_captype(postdot, csconv);
         }
         if (captype_ == INITCAP) {
           std::string str(scw);
           str.insert(dot_pos + 1, 1, ' ');
           insert_sug(slst, str);
         }
       }

       std::string wspace;

       if (captype == HUHINITCAP) {
         // TheOpenOffice.org -> The OpenOffice.org
         wspace = scw;
         mkinitsmall2(wspace, sunicw);
         pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
       }
       wspace = scw;
       mkallsmall2(wspace, sunicw);
       if (spell(wspace.c_str()))
         insert_sug(slst, wspace);
       size_t prevns = slst.size();
       pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
       if (captype == HUHINITCAP) {
         mkinitcap2(wspace, sunicw);
         if (spell(wspace.c_str()))
           insert_sug(slst, wspace);
         pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
       }
       // aNew -> "a New" (instead of "a new")
       for (size_t j = prevns; j < slst.size(); ++j) {
         const char* space = strchr(slst[j].c_str(), ' ');
         if (space) {
           size_t slen = strlen(space + 1);
           // different case after space (need capitalisation)
           if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
             std::string first(slst[j].c_str(), space + 1);
             std::string second(space + 1);
             std::vector<w_char> w;
             if (utf8)
               u8_u16(w, second);
             mkinitcap2(second, w);
             // set as first suggestion
             slst.erase(slst.begin() + j);
             slst.insert(slst.begin(), first + second);
           }
         }
       }
       break;
     }

     case ALLCAP: {
       std::string wspace(scw);
       mkallsmall2(wspace, sunicw);
       pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
       if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
         insert_sug(slst, wspace);
       mkinitcap2(wspace, sunicw);
       pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
       for (size_t j = 0; j < slst.size(); ++j) {
         mkallcap(slst[j]);
         if (pAMgr && pAMgr->get_checksharps()) {
           if (utf8) {
             mystrrep(slst[j], "\xC3\x9F", "SS");
           } else {
             mystrrep(slst[j], "\xDF", "SS");
           }
         }
       }
       break;
     }
   }

   // LANG_hu section: replace '-' with ' ' in Hungarian
   if (langnum == LANG_hu) {
     for (size_t j = 0; j < slst.size(); ++j) {
       size_t pos = slst[j].find('-');
       if (pos != std::string::npos) {
         int info;
         std::string w(slst[j].substr(0, pos));
         w.append(slst[j].substr(pos + 1));
         (void)spell(w, &info, NULL);
         if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
           slst[j][pos] = ' ';
         } else
           slst[j][pos] = '-';
       }
     }
   }
   // END OF LANG_hu section

   // try ngram approach since found nothing or only compound words
   if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
     switch (captype) {
       case NOCAP: {
         pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs);
         break;
       }
       case HUHINITCAP:
         capwords = 1;
       case HUHCAP: {
         std::string wspace(scw);
         mkallsmall2(wspace, sunicw);
         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
         break;
       }
       case INITCAP: {
         capwords = 1;
         std::string wspace(scw);
         mkallsmall2(wspace, sunicw);
         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
         break;
       }
       case ALLCAP: {
         std::string wspace(scw);
         mkallsmall2(wspace, sunicw);
         size_t oldns = slst.size();
         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
         for (size_t j = oldns; j < slst.size(); ++j) {
           mkallcap(slst[j]);
         }
         break;
       }
     }
   }

   // try dash suggestion (Afo-American -> Afro-American)
   size_t dash_pos = scw.find('-');
   if (dash_pos != std::string::npos) {
     int nodashsug = 1;
     for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
       if (slst[j].find('-') != std::string::npos)
         nodashsug = 0;
     }

     size_t prev_pos = 0;
     bool last = false;

     while (nodashsug && !last) {
       if (dash_pos == scw.size())
         last = 1;
       std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
       if (!spell(chunk.c_str())) {
         std::vector<std::string> nlst = suggest(chunk.c_str());
         for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
           std::string wspace = scw.substr(0, prev_pos);
           wspace.append(*j);
           if (!last) {
             wspace.append("-");
             wspace.append(scw.substr(dash_pos + 1));
           }
           insert_sug(slst, wspace);
         }
         nodashsug = 0;
       }
       if (!last) {
         prev_pos = dash_pos + 1;
         dash_pos = scw.find('-', prev_pos);
       }
       if (dash_pos == std::string::npos)
         dash_pos = scw.size();
     }
   }

   // word reversing wrapper for complex prefixes
   if (complexprefixes) {
     for (size_t j = 0; j < slst.size(); ++j) {
       if (utf8)
         reverseword_utf(slst[j]);
       else
         reverseword(slst[j]);
     }
   }

   // capitalize
   if (capwords)
     for (size_t j = 0; j < slst.size(); ++j) {
       mkinitcap(slst[j]);
     }

   // expand suggestions with dot(s)
   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
     for (size_t j = 0; j < slst.size(); ++j) {
       slst[j].append(word.substr(word.size() - abbv));
     }
   }

   // remove bad capitalized and forbidden forms
   if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
     switch (captype) {
       case INITCAP:
       case ALLCAP: {
         size_t l = 0;
         for (size_t j = 0; j < slst.size(); ++j) {
           if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
             std::string s;
             std::vector<w_char> w;
             if (utf8) {
               u8_u16(w, slst[j]);
             } else {
               s = slst[j];
             }
             mkallsmall2(s, w);
             if (spell(s)) {
               slst[l] = s;
               ++l;
             } else {
               mkinitcap2(s, w);
               if (spell(s)) {
                 slst[l] = s;
                 ++l;
               }
             }
           } else {
             slst[l] = slst[j];
             ++l;
           }
         }
         slst.resize(l);
       }
     }
   }

   // remove duplications
   size_t l = 0;
   for (size_t j = 0; j < slst.size(); ++j) {
     slst[l] = slst[j];
     for (size_t k = 0; k < l; ++k) {
       if (slst[k] == slst[j]) {
         --l;
         break;
       }
     }
     ++l;
   }
   slst.resize(l);

   // output conversion
   rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
   for (size_t j = 0; rl && j < slst.size(); ++j) {
     std::string wspace;
     if (rl->conv(slst[j], wspace)) {
       slst[j] = wspace;
     }
   }

   return slst;
 }

 const std::string& Hunspell::get_dict_encoding() const {
   return m_Impl->get_dict_encoding();
 }

 const std::string& HunspellImpl::get_dict_encoding() const {
   return encoding;
 }

 std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
   return m_Impl->stem(desc);
 }

 std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
   std::vector<std::string> slst;

   std::string result2;
   if (desc.empty())
     return slst;
   for (size_t i = 0; i < desc.size(); ++i) {

     std::string result;

     // add compound word parts (except the last one)
     const char* s = desc[i].c_str();
     const char* part = strstr(s, MORPH_PART);
     if (part) {
       const char* nextpart = strstr(part + 1, MORPH_PART);
       while (nextpart) {
         std::string field;
         copy_field(field, part, MORPH_PART);
         result.append(field);
         part = nextpart;
         nextpart = strstr(part + 1, MORPH_PART);
       }
       s = part;
     }

     std::string tok(s);
     size_t alt = 0;
     while ((alt = tok.find(" | ", alt)) != std::string::npos) {
       tok[alt + 1] = MSEP_ALT;
     }
     std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
     for (size_t k = 0; k < pl.size(); ++k) {
       // add derivational suffixes
       if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
         // remove inflectional suffixes
         const size_t is = pl[k].find(MORPH_INFL_SFX);
         if (is != std::string::npos)
           pl[k].resize(is);
         std::vector<std::string> singlepl;
         singlepl.push_back(pl[k]);
         std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
         if (!sg.empty()) {
           std::vector<std::string> gen = line_tok(sg, MSEP_REC);
           for (size_t j = 0; j < gen.size(); ++j) {
             result2.push_back(MSEP_REC);
             result2.append(result);
             result2.append(gen[j]);
           }
         }
       } else {
         result2.push_back(MSEP_REC);
         result2.append(result);
         if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
           std::string field;
           copy_field(field, pl[k], MORPH_SURF_PFX);
           result2.append(field);
         }
         std::string field;
         copy_field(field, pl[k], MORPH_STEM);
         result2.append(field);
       }
     }
   }
   slst = line_tok(result2, MSEP_REC);
   uniqlist(slst);
   return slst;
 }

 std::vector<std::string> Hunspell::stem(const std::string& word) {
   return m_Impl->stem(word);
 }

 std::vector<std::string> HunspellImpl::stem(const std::string& word) {
   return stem(analyze(word));
 }

 const char* Hunspell::get_wordchars() const {
   return m_Impl->get_wordchars().c_str();
 }

 const std::string& Hunspell::get_wordchars_cpp() const {
   return m_Impl->get_wordchars();
 }

 const std::string& HunspellImpl::get_wordchars() const {
   return pAMgr->get_wordchars();
 }

 const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
   return m_Impl->get_wordchars_utf16();
 }

 const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
   return pAMgr->get_wordchars_utf16();
 }

 void HunspellImpl::mkinitcap(std::string& u8) {
   if (utf8) {
     std::vector<w_char> u16;
     u8_u16(u16, u8);
     ::mkinitcap_utf(u16, langnum);
     u16_u8(u8, u16);
   } else {
     ::mkinitcap(u8, csconv);
   }
 }

 int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
   if (utf8) {
     ::mkinitcap_utf(u16, langnum);
     u16_u8(u8, u16);
   } else {
     ::mkinitcap(u8, csconv);
   }
   return u8.size();
 }

 int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
   if (utf8) {
     ::mkinitsmall_utf(u16, langnum);
     u16_u8(u8, u16);
   } else {
     ::mkinitsmall(u8, csconv);
   }
   return u8.size();
 }

 int Hunspell::add(const std::string& word) {
   return m_Impl->add(word);
 }

 int HunspellImpl::add(const std::string& word) {
   if (!m_HMgrs.empty())
     return m_HMgrs[0]->add(word);
   return 0;
 }

 int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
   return m_Impl->add_with_affix(word, example);
 }

 int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
   if (!m_HMgrs.empty())
     return m_HMgrs[0]->add_with_affix(word, example);
   return 0;
 }

 int Hunspell::remove(const std::string& word) {
   return m_Impl->remove(word);
 }

 int HunspellImpl::remove(const std::string& word) {
   if (!m_HMgrs.empty())
     return m_HMgrs[0]->remove(word);
   return 0;
 }

 const char* Hunspell::get_version() const {
   return m_Impl->get_version().c_str();
 }

 const std::string& Hunspell::get_version_cpp() const {
   return m_Impl->get_version();
 }

 const std::string& HunspellImpl::get_version() const {
   return pAMgr->get_version();
 }

 struct cs_info* HunspellImpl::get_csconv() {
   return csconv;
 }

 struct cs_info* Hunspell::get_csconv() {
   return m_Impl->get_csconv();
 }

 void HunspellImpl::cat_result(std::string& result, const std::string& st) {
   if (!st.empty()) {
     if (!result.empty())
       result.append("\n");
     result.append(st);
   }
 }

 std::vector<std::string> Hunspell::analyze(const std::string& word) {
   return m_Impl->analyze(word);
 }

 std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
   std::vector<std::string> slst;
   if (!pSMgr || m_HMgrs.empty())
     return slst;
   if (utf8) {
     if (word.size() >= MAXWORDUTF8LEN)
       return slst;
   } else {
     if (word.size() >= MAXWORDLEN)
       return slst;
   }
   int captype = NOCAP;
   size_t abbv = 0;
   size_t wl = 0;

   std::string scw;
   std::vector<w_char> sunicw;

   // input conversion
   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   {
     std::string wspace;

     bool convstatus = rl ? rl->conv(word, wspace) : false;
     if (convstatus)
       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
     else
       wl = cleanword2(scw, sunicw, word, &captype, &abbv);
   }

   if (wl == 0) {
     if (abbv) {
       scw.clear();
       for (wl = 0; wl < abbv; wl++)
         scw.push_back('.');
       abbv = 0;
     } else
       return slst;
   }

   std::string result;

   size_t n = 0;
   // test numbers
   // LANG_hu section: set dash information for suggestions
   if (langnum == LANG_hu) {
     size_t n2 = 0;
     size_t n3 = 0;

     while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
                         (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
       n++;
       if ((scw[n] == '.') || (scw[n] == ',')) {
         if (((n2 == 0) && (n > 3)) ||
             ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
           break;
         n2++;
         n3 = n;
       }
     }

     if ((n == wl) && (n3 > 0) && (n - n3 > 3))
       return slst;
     if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
                       checkword(scw.substr(n), NULL, NULL))) {
       result.append(scw);
       result.resize(n - 1);
       if (n == wl)
         cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
       else {
         std::string chunk = scw.substr(n - 1, 1);
         cat_result(result, pSMgr->suggest_morph(chunk));
         result.push_back('+');  // XXX SPEC. MORPHCODE
         cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
       }
       return line_tok(result, MSEP_REC);
     }
   }
   // END OF LANG_hu section

   switch (captype) {
     case HUHCAP:
     case HUHINITCAP:
     case NOCAP: {
       cat_result(result, pSMgr->suggest_morph(scw));
       if (abbv) {
         std::string u8buffer(scw);
         u8buffer.push_back('.');
         cat_result(result, pSMgr->suggest_morph(u8buffer));
       }
       break;
     }
     case INITCAP: {
       mkallsmall2(scw, sunicw);
       std::string u8buffer(scw);
       mkinitcap2(scw, sunicw);
       cat_result(result, pSMgr->suggest_morph(u8buffer));
       cat_result(result, pSMgr->suggest_morph(scw));
       if (abbv) {
         u8buffer.push_back('.');
         cat_result(result, pSMgr->suggest_morph(u8buffer));

         u8buffer = scw;
         u8buffer.push_back('.');

         cat_result(result, pSMgr->suggest_morph(u8buffer));
       }
       break;
     }
     case ALLCAP: {
       cat_result(result, pSMgr->suggest_morph(scw));
       if (abbv) {
         std::string u8buffer(scw);
         u8buffer.push_back('.');
         cat_result(result, pSMgr->suggest_morph(u8buffer));
       }
       mkallsmall2(scw, sunicw);
       std::string u8buffer(scw);
       mkinitcap2(scw, sunicw);

       cat_result(result, pSMgr->suggest_morph(u8buffer));
       cat_result(result, pSMgr->suggest_morph(scw));
       if (abbv) {
         u8buffer.push_back('.');
         cat_result(result, pSMgr->suggest_morph(u8buffer));

         u8buffer = scw;
         u8buffer.push_back('.');

         cat_result(result, pSMgr->suggest_morph(u8buffer));
       }
       break;
     }
   }

   if (!result.empty()) {
     // word reversing wrapper for complex prefixes
     if (complexprefixes) {
       if (utf8)
         reverseword_utf(result);
       else
         reverseword(result);
     }
     return line_tok(result, MSEP_REC);
   }

   // compound word with dash (HU) I18n
   // LANG_hu section: set dash information for suggestions

   size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
   if (dash_pos != std::string::npos) {
     int nresult = 0;

     std::string part1 = scw.substr(0, dash_pos);
     std::string part2 = scw.substr(dash_pos+1);

     // examine 2 sides of the dash
     if (part2.empty()) {  // base word ending with dash
       if (spell(part1)) {
         std::string p = pSMgr->suggest_morph(part1);
         if (!p.empty()) {
           slst = line_tok(p, MSEP_REC);
           return slst;
         }
       }
     } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
       if (spell(part1) && (spell("-e"))) {
         std::string st = pSMgr->suggest_morph(part1);
         if (!st.empty()) {
           result.append(st);
         }
         result.push_back('+');  // XXX spec. separator in MORPHCODE
         st = pSMgr->suggest_morph("-e");
         if (!st.empty()) {
           result.append(st);
         }
         return line_tok(result, MSEP_REC);
       }
     } else {
       // first word ending with dash: word- XXX ???
       part1.push_back(' ');
       nresult = spell(part1);
       part1.erase(part1.size() - 1);
       if (nresult && spell(part2) &&
           ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
         std::string st = pSMgr->suggest_morph(part1);
         if (!st.empty()) {
           result.append(st);
           result.push_back('+');  // XXX spec. separator in MORPHCODE
         }
         st = pSMgr->suggest_morph(part2);
         if (!st.empty()) {
           result.append(st);
         }
         return line_tok(result, MSEP_REC);
       }
     }
     // affixed number in correct word
     if (nresult && (dash_pos > 0) &&
         (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
          (scw[dash_pos - 1] == '.'))) {
       n = 1;
       if (scw[dash_pos - n] == '.')
         n++;
       // search first not a number character to left from dash
       while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
              (n < 6)) {
         n++;
       }
       if (dash_pos < n)
         n--;
       // numbers: valami1000000-hoz
       // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
       // 56-hoz, 6-hoz
       for (; n >= 1; n--) {
         if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
             continue;
         }
         std::string chunk = scw.substr(dash_pos - n);
         if (checkword(chunk, NULL, NULL)) {
           result.append(chunk);
           std::string st = pSMgr->suggest_morph(chunk);
           if (!st.empty()) {
             result.append(st);
           }
           return line_tok(result, MSEP_REC);
         }
       }
     }
   }
   return slst;
 }

 std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
   return m_Impl->generate(word, pl);
 }

 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
   std::vector<std::string> slst;
   if (!pSMgr || pl.empty())
     return slst;
   std::vector<std::string> pl2 = analyze(word);
   int captype = NOCAP;
   int abbv = 0;
   std::string cw;
   cleanword(cw, word, &captype, &abbv);
   std::string result;

   for (size_t i = 0; i < pl.size(); ++i) {
     cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
   }

   if (!result.empty()) {
     // allcap
     if (captype == ALLCAP)
       mkallcap(result);

     // line split
     slst = line_tok(result, MSEP_REC);

     // capitalize
     if (captype == INITCAP || captype == HUHINITCAP) {
       for (size_t j = 0; j < slst.size(); ++j) {
         mkinitcap(slst[j]);
       }
     }

     // temporary filtering of prefix related errors (eg.
     // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
     std::vector<std::string>::iterator it = slst.begin();
     while (it != slst.end()) {
       if (!spell(*it)) {
         it = slst.erase(it);
       } else  {
         ++it;
       }
     }
   }
   return slst;
 }

 std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
   return m_Impl->generate(word, pattern);
 }

 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
   std::vector<std::string> pl = analyze(pattern);
   std::vector<std::string> slst = generate(word, pl);
   uniqlist(slst);
   return slst;
 }

 // minimal XML parser functions
 std::string HunspellImpl::get_xml_par(const char* par) {
   std::string dest;
   if (!par)
     return dest;
   char end = *par;
   if (end == '>')
     end = '<';
   else if (end != '\'' && end != '"')
     return 0;  // bad XML
   for (par++; *par != '\0' && *par != end; ++par) {
     dest.push_back(*par);
   }
   mystrrep(dest, "&lt;", "<");
   mystrrep(dest, "&amp;", "&");
   return dest;
 }

 int Hunspell::get_langnum() const {
   return m_Impl->get_langnum();
 }

 int HunspellImpl::get_langnum() const {
   return langnum;
 }

 bool Hunspell::input_conv(const std::string& word, std::string& dest) {
   return m_Impl->input_conv(word, dest);
 }

 int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
   std::string d;
   bool ret = input_conv(word, d);
   if (ret && d.size() < destsize) {
     strncpy(dest, d.c_str(), destsize);
     return 1;
   }
   return 0;
 }

 bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
   RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
   if (rl) {
     return rl->conv(word, dest);
   }
   dest.assign(word);
   return false;
 }

 // return the beginning of the element (attr == NULL) or the attribute
 const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {
   const char* end = strchr(s, '>');
   const char* p = s;
   if (attr == NULL)
     return end;
   do {
     p = strstr(p, attr);
     if (!p || p >= end)
       return 0;
   } while (*(p - 1) != ' ' && *(p - 1) != '\n');
   return p + strlen(attr);
 }

 int HunspellImpl::check_xml_par(const char* q,
                             const char* attr,
                             const char* value) {
   std::string cw = get_xml_par(get_xml_pos(q, attr));
   if (cw == value)
     return 1;
   return 0;
 }

 std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {
   std::vector<std::string> slst;
   if (!list)
     return slst;
   const char* p = list;
   for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {
     std::string cw = get_xml_par(p + strlen(tag) - 1);
     if (cw.empty()) {
       break;
     }
     slst.push_back(cw);
   }
   return slst;
 }

 std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
   std::vector<std::string> slst;

   const char* word = in_word.c_str();

   const char* q = strstr(word, "<query");
   if (!q)
     return slst;  // bad XML input
   const char* q2 = strchr(q, '>');
   if (!q2)
     return slst;  // bad XML input
   q2 = strstr(q2, "<word");
   if (!q2)
     return slst;  // bad XML input
   if (check_xml_par(q, "type=", "analyze")) {
     std::string cw = get_xml_par(strchr(q2, '>'));
     if (!cw.empty())
       slst = analyze(cw);
     if (slst.empty())
       return slst;
     // convert the result to <code><a>ana1</a><a>ana2</a></code> format
     std::string r;
     r.append("<code>");
     for (size_t i = 0; i < slst.size(); ++i) {
       r.append("<a>");

       std::string entry(slst[i]);
       mystrrep(entry, "\t", " ");
       mystrrep(entry, "&", "&amp;");
       mystrrep(entry, "<", "&lt;");
       r.append(entry);

       r.append("</a>");
     }
     r.append("</code>");
     slst.clear();
     slst.push_back(r);
     return slst;
   } else if (check_xml_par(q, "type=", "stem")) {
     std::string cw = get_xml_par(strchr(q2, '>'));
     if (!cw.empty())
       return stem(cw);
   } else if (check_xml_par(q, "type=", "generate")) {
     std::string cw = get_xml_par(strchr(q2, '>'));
     if (cw.empty())
       return slst;
     const char* q3 = strstr(q2 + 1, "<word");
     if (q3) {
       std::string cw2 = get_xml_par(strchr(q3, '>'));
       if (!cw2.empty()) {
         return generate(cw, cw2);
       }
     } else {
       if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
         std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");
         if (!slst2.empty()) {
           slst = generate(cw, slst2);
           uniqlist(slst);
           return slst;
         }
       }
     }
   }
   return slst;
 }

 int Hunspell::spell(const char* word, int* info, char** root) {
   std::string sroot;
   bool ret = m_Impl->spell(word, info, root ? &sroot : NULL);
   if (root) {
     if (sroot.empty()) {
       *root = NULL;
     } else {
       *root = mystrdup(sroot.c_str());
     }
   }
   return ret;
 }

 namespace {
   int munge_vector(char*** slst, const std::vector<std::string>& items) {
     if (items.empty()) {
       *slst = NULL;
       return 0;
     } else {
       *slst = (char**)malloc(sizeof(char*) * items.size());
       if (!*slst)
         return 0;
       for (size_t i = 0; i < items.size(); ++i)
         (*slst)[i] = mystrdup(items[i].c_str());
     }
     return items.size();
   }
 }

 void Hunspell::free_list(char*** slst, int n) {
   Hunspell_free_list((Hunhandle*)(this), slst, n);
 }

 int Hunspell::suggest(char*** slst, const char* word) {
   return Hunspell_suggest((Hunhandle*)(this), slst, word);
 }

 int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
   std::vector<std::string> stems = m_Impl->suffix_suggest(root_word);
   return munge_vector(slst, stems);
 }

 char* Hunspell::get_dic_encoding() {
   return &(m_Impl->dic_encoding_vec[0]);
 }

 int Hunspell::stem(char*** slst, char** desc, int n) {
   return Hunspell_stem2((Hunhandle*)(this), slst, desc, n);
 }

 int Hunspell::stem(char*** slst, const char* word) {
   return Hunspell_stem((Hunhandle*)(this), slst, word);
 }

 int Hunspell::analyze(char*** slst, const char* word) {
   return Hunspell_analyze((Hunhandle*)(this), slst, word);
 }

 int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
   return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln);
 }

 int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
   return Hunspell_generate((Hunhandle*)(this), slst, word, pattern);
 }

 Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
 #ifdef HUNSPELL_CHROME_CLIENT
         return NULL;
 #else
   return (Hunhandle*)(new Hunspell(affpath, dpath));
 #endif
 }

 Hunhandle* Hunspell_create_key(const char* affpath,
                                const char* dpath,
                                const char* key) {
 #ifdef HUNSPELL_CHROME_CLIENT
         return NULL;
 #else
   return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key));
 #endif
 }

 void Hunspell_destroy(Hunhandle* pHunspell) {
   delete reinterpret_cast<Hunspell*>(pHunspell);
 }

 #ifndef HUNSPELL_CHROME_CLIENT
 int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
   return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath);
 }
 #endif

 int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
   return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word));
 }

 char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
   return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding();
 }

 int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
   std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word);
   return munge_vector(slst, suggests);
 }

 int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word);
   return munge_vector(slst, stems);
 }

 int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {

   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word);
   return munge_vector(slst, stems);
 }

 int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
   std::vector<std::string> morph;
   for (int i = 0; i < n; ++i)
     morph.push_back(desc[i]);

   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph);
   return munge_vector(slst, stems);
 }

 int Hunspell_generate(Hunhandle* pHunspell,
                       char*** slst,
                       const char* word,
                       const char* pattern) {
   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern);
   return munge_vector(slst, stems);
 }

 int Hunspell_generate2(Hunhandle* pHunspell,
                        char*** slst,
                        const char* word,
                        char** desc,
                        int n) {
   std::vector<std::string> morph;
   for (int i = 0; i < n; ++i)
     morph.push_back(desc[i]);

   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph);
   return munge_vector(slst, stems);
 }

 /* functions for run-time modification of the dictionary */

 /* add word to the run-time dictionary */

 int Hunspell_add(Hunhandle* pHunspell, const char* word) {
   return reinterpret_cast<Hunspell*>(pHunspell)->add(word);
 }

 /* add word to the run-time dictionary with affix flags of
  * the example (a dictionary word): Hunspell will recognize
  * affixed forms of the new word, too.
  */

 int Hunspell_add_with_affix(Hunhandle* pHunspell,
                             const char* word,
                             const char* example) {
   return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example);
 }

 /* remove word from the run-time dictionary */

 int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
   return reinterpret_cast<Hunspell*>(pHunspell)->remove(word);
 }

 void Hunspell_free_list(Hunhandle*, char*** list, int n) {
   if (list && *list) {
     for (int i = 0; i < n; i++)
       free((*list)[i]);
     free(*list);
     *list = NULL;
   }
 }

 std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
   return m_Impl->suffix_suggest(root_word);
 }

 std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
   std::vector<std::string> slst;
   struct hentry* he = NULL;
   int len;
   std::string w2;
   const char* word;
   const char* ignoredchars = pAMgr->get_ignore();
   if (ignoredchars != NULL) {
     w2.assign(root_word);
     if (utf8) {
       const std::vector<w_char>& ignoredchars_utf16 =
           pAMgr->get_ignore_utf16();
       remove_ignored_chars_utf(w2, ignoredchars_utf16);
     } else {
       remove_ignored_chars(w2, ignoredchars);
     }
     word = w2.c_str();
   } else
     word = root_word.c_str();

   len = strlen(word);

   if (!len)
     return slst;

   for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
     he = m_HMgrs[i]->lookup(word);
   }
   if (he) {
     slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
   }
   return slst;
 }