boost_1_45_0/libs/regex/src/icu.cpp - nest-learning-thermostat/5.1.3/boost - Git at Google

 /*
  *
  * Copyright (c) 2004
  * John Maddock
  *
  * Use, modification and distribution are subject to the
  * Boost Software License, Version 1.0. (See accompanying file
  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  *
  */

  /*
   *   LOCATION:    see http://www.boost.org for most recent version.
   *   FILE         icu.cpp
   *   VERSION      see <boost/version.hpp>
   *   DESCRIPTION: Unicode regular expressions on top of the ICU Library.
   */
 #define BOOST_REGEX_SOURCE

 #include <boost/regex/config.hpp>
 #ifdef BOOST_HAS_ICU
 #define BOOST_REGEX_ICU_INSTANTIATE
 #include <boost/regex/icu.hpp>

 #ifdef BOOST_INTEL
 #pragma warning(disable:981 2259 383)
 #endif

 namespace boost{

 namespace re_detail{

 icu_regex_traits_implementation::string_type icu_regex_traits_implementation::do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const
 {
    // TODO make thread safe!!!! :
    typedef u32_to_u16_iterator<const char_type*, ::UChar> itt;
    itt i(p1), j(p2);
 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
    std::vector< ::UChar> t(i, j);
 #else
    std::vector< ::UChar> t;
    while(i != j)
       t.push_back(*i++);
 #endif
    ::uint8_t result[100];
    ::int32_t len;
    if(t.size())
       len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result));
    else
       len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), result, sizeof(result));
    if(std::size_t(len) > sizeof(result))
    {
       scoped_array< ::uint8_t> presult(new ::uint8_t[len+1]);
       if(t.size())
          len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len+1);
       else
          len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), presult.get(), len+1);
       if((0 == presult[len-1]) && (len > 1))
          --len;
 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
       return string_type(presult.get(), presult.get()+len);
 #else
       string_type sresult;
       ::uint8_t const* ia = presult.get();
       ::uint8_t const* ib = presult.get()+len;
       while(ia != ib)
          sresult.push_back(*ia++);
       return sresult;
 #endif
    }
    if((0 == result[len-1]) && (len > 1))
       --len;
 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
    return string_type(result, result+len);
 #else
    string_type sresult;
    ::uint8_t const* ia = result;
    ::uint8_t const* ib = result+len;
    while(ia != ib)
       sresult.push_back(*ia++);
    return sresult;
 #endif
 }

 }

 icu_regex_traits::size_type icu_regex_traits::length(const char_type* p)
 {
    size_type result = 0;
    while(*p)
    {
       ++p;
       ++result;
    }
    return result;
 }

 //
 // define our bitmasks:
 //
 const icu_regex_traits::char_class_type icu_regex_traits::mask_blank = icu_regex_traits::char_class_type(1) << offset_blank;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_space = icu_regex_traits::char_class_type(1) << offset_space;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_xdigit = icu_regex_traits::char_class_type(1) << offset_xdigit;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_underscore = icu_regex_traits::char_class_type(1) << offset_underscore;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_unicode = icu_regex_traits::char_class_type(1) << offset_unicode;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_any = icu_regex_traits::char_class_type(1) << offset_any;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_ascii = icu_regex_traits::char_class_type(1) << offset_ascii;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_horizontal = icu_regex_traits::char_class_type(1) << offset_horizontal;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_vertical = icu_regex_traits::char_class_type(1) << offset_vertical;

 icu_regex_traits::char_class_type icu_regex_traits::lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2)
 {
    static const ::UChar32 prop_name_table[] = {
       /* any */  'a', 'n', 'y',
       /* ascii */  'a', 's', 'c', 'i', 'i',
       /* assigned */  'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
       /* c* */  'c', '*',
       /* cc */  'c', 'c',
       /* cf */  'c', 'f',
       /* closepunctuation */  'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
       /* cn */  'c', 'n',
       /* co */  'c', 'o',
       /* connectorpunctuation */  'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
       /* control */  'c', 'o', 'n', 't', 'r', 'o', 'l',
       /* cs */  'c', 's',
       /* currencysymbol */  'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l',
       /* dashpunctuation */  'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
       /* decimaldigitnumber */  'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r',
       /* enclosingmark */  'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
       /* finalpunctuation */  'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
       /* format */  'f', 'o', 'r', 'm', 'a', 't',
       /* initialpunctuation */  'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
       /* l* */  'l', '*',
       /* letter */  'l', 'e', 't', 't', 'e', 'r',
       /* letternumber */  'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
       /* lineseparator */  'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
       /* ll */  'l', 'l',
       /* lm */  'l', 'm',
       /* lo */  'l', 'o',
       /* lowercaseletter */  'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
       /* lt */  'l', 't',
       /* lu */  'l', 'u',
       /* m* */  'm', '*',
       /* mark */  'm', 'a', 'r', 'k',
       /* mathsymbol */  'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l',
       /* mc */  'm', 'c',
       /* me */  'm', 'e',
       /* mn */  'm', 'n',
       /* modifierletter */  'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
       /* modifiersymbol */  'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
       /* n* */  'n', '*',
       /* nd */  'n', 'd',
       /* nl */  'n', 'l',
       /* no */  'n', 'o',
       /* nonspacingmark */  'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
       /* notassigned */  'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
       /* number */  'n', 'u', 'm', 'b', 'e', 'r',
       /* openpunctuation */  'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
       /* other */  'o', 't', 'h', 'e', 'r',
       /* otherletter */  'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
       /* othernumber */  'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
       /* otherpunctuation */  'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
       /* othersymbol */  'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
       /* p* */  'p', '*',
       /* paragraphseparator */  'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
       /* pc */  'p', 'c',
       /* pd */  'p', 'd',
       /* pe */  'p', 'e',
       /* pf */  'p', 'f',
       /* pi */  'p', 'i',
       /* po */  'p', 'o',
       /* privateuse */  'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e',
       /* ps */  'p', 's',
       /* punctuation */  'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
       /* s* */  's', '*',
       /* sc */  's', 'c',
       /* separator */  's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
       /* sk */  's', 'k',
       /* sm */  's', 'm',
       /* so */  's', 'o',
       /* spaceseparator */  's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
       /* spacingcombiningmark */  's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
       /* surrogate */  's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e',
       /* symbol */  's', 'y', 'm', 'b', 'o', 'l',
       /* titlecase */  't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e',
       /* titlecaseletter */  't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
       /* uppercaseletter */  'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
       /* z* */  'z', '*',
       /* zl */  'z', 'l',
       /* zp */  'z', 'p',
       /* zs */  'z', 's',
    };

    static const re_detail::character_pointer_range< ::UChar32> range_data[] = {
       { prop_name_table+0, prop_name_table+3, }, // any
       { prop_name_table+3, prop_name_table+8, }, // ascii
       { prop_name_table+8, prop_name_table+16, }, // assigned
       { prop_name_table+16, prop_name_table+18, }, // c*
       { prop_name_table+18, prop_name_table+20, }, // cc
       { prop_name_table+20, prop_name_table+22, }, // cf
       { prop_name_table+22, prop_name_table+38, }, // closepunctuation
       { prop_name_table+38, prop_name_table+40, }, // cn
       { prop_name_table+40, prop_name_table+42, }, // co
       { prop_name_table+42, prop_name_table+62, }, // connectorpunctuation
       { prop_name_table+62, prop_name_table+69, }, // control
       { prop_name_table+69, prop_name_table+71, }, // cs
       { prop_name_table+71, prop_name_table+85, }, // currencysymbol
       { prop_name_table+85, prop_name_table+100, }, // dashpunctuation
       { prop_name_table+100, prop_name_table+118, }, // decimaldigitnumber
       { prop_name_table+118, prop_name_table+131, }, // enclosingmark
       { prop_name_table+131, prop_name_table+147, }, // finalpunctuation
       { prop_name_table+147, prop_name_table+153, }, // format
       { prop_name_table+153, prop_name_table+171, }, // initialpunctuation
       { prop_name_table+171, prop_name_table+173, }, // l*
       { prop_name_table+173, prop_name_table+179, }, // letter
       { prop_name_table+179, prop_name_table+191, }, // letternumber
       { prop_name_table+191, prop_name_table+204, }, // lineseparator
       { prop_name_table+204, prop_name_table+206, }, // ll
       { prop_name_table+206, prop_name_table+208, }, // lm
       { prop_name_table+208, prop_name_table+210, }, // lo
       { prop_name_table+210, prop_name_table+225, }, // lowercaseletter
       { prop_name_table+225, prop_name_table+227, }, // lt
       { prop_name_table+227, prop_name_table+229, }, // lu
       { prop_name_table+229, prop_name_table+231, }, // m*
       { prop_name_table+231, prop_name_table+235, }, // mark
       { prop_name_table+235, prop_name_table+245, }, // mathsymbol
       { prop_name_table+245, prop_name_table+247, }, // mc
       { prop_name_table+247, prop_name_table+249, }, // me
       { prop_name_table+249, prop_name_table+251, }, // mn
       { prop_name_table+251, prop_name_table+265, }, // modifierletter
       { prop_name_table+265, prop_name_table+279, }, // modifiersymbol
       { prop_name_table+279, prop_name_table+281, }, // n*
       { prop_name_table+281, prop_name_table+283, }, // nd
       { prop_name_table+283, prop_name_table+285, }, // nl
       { prop_name_table+285, prop_name_table+287, }, // no
       { prop_name_table+287, prop_name_table+301, }, // nonspacingmark
       { prop_name_table+301, prop_name_table+312, }, // notassigned
       { prop_name_table+312, prop_name_table+318, }, // number
       { prop_name_table+318, prop_name_table+333, }, // openpunctuation
       { prop_name_table+333, prop_name_table+338, }, // other
       { prop_name_table+338, prop_name_table+349, }, // otherletter
       { prop_name_table+349, prop_name_table+360, }, // othernumber
       { prop_name_table+360, prop_name_table+376, }, // otherpunctuation
       { prop_name_table+376, prop_name_table+387, }, // othersymbol
       { prop_name_table+387, prop_name_table+389, }, // p*
       { prop_name_table+389, prop_name_table+407, }, // paragraphseparator
       { prop_name_table+407, prop_name_table+409, }, // pc
       { prop_name_table+409, prop_name_table+411, }, // pd
       { prop_name_table+411, prop_name_table+413, }, // pe
       { prop_name_table+413, prop_name_table+415, }, // pf
       { prop_name_table+415, prop_name_table+417, }, // pi
       { prop_name_table+417, prop_name_table+419, }, // po
       { prop_name_table+419, prop_name_table+429, }, // privateuse
       { prop_name_table+429, prop_name_table+431, }, // ps
       { prop_name_table+431, prop_name_table+442, }, // punctuation
       { prop_name_table+442, prop_name_table+444, }, // s*
       { prop_name_table+444, prop_name_table+446, }, // sc
       { prop_name_table+446, prop_name_table+455, }, // separator
       { prop_name_table+455, prop_name_table+457, }, // sk
       { prop_name_table+457, prop_name_table+459, }, // sm
       { prop_name_table+459, prop_name_table+461, }, // so
       { prop_name_table+461, prop_name_table+475, }, // spaceseparator
       { prop_name_table+475, prop_name_table+495, }, // spacingcombiningmark
       { prop_name_table+495, prop_name_table+504, }, // surrogate
       { prop_name_table+504, prop_name_table+510, }, // symbol
       { prop_name_table+510, prop_name_table+519, }, // titlecase
       { prop_name_table+519, prop_name_table+534, }, // titlecaseletter
       { prop_name_table+534, prop_name_table+549, }, // uppercaseletter
       { prop_name_table+549, prop_name_table+551, }, // z*
       { prop_name_table+551, prop_name_table+553, }, // zl
       { prop_name_table+553, prop_name_table+555, }, // zp
       { prop_name_table+555, prop_name_table+557, }, // zs
    };

    static const icu_regex_traits::char_class_type icu_class_map[] = {
       icu_regex_traits::mask_any, // any
       icu_regex_traits::mask_ascii, // ascii
       (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned
       U_GC_C_MASK, // c*
       U_GC_CC_MASK, // cc
       U_GC_CF_MASK, // cf
       U_GC_PE_MASK, // closepunctuation
       U_GC_CN_MASK, // cn
       U_GC_CO_MASK, // co
       U_GC_PC_MASK, // connectorpunctuation
       U_GC_CC_MASK, // control
       U_GC_CS_MASK, // cs
       U_GC_SC_MASK, // currencysymbol
       U_GC_PD_MASK, // dashpunctuation
       U_GC_ND_MASK, // decimaldigitnumber
       U_GC_ME_MASK, // enclosingmark
       U_GC_PF_MASK, // finalpunctuation
       U_GC_CF_MASK, // format
       U_GC_PI_MASK, // initialpunctuation
       U_GC_L_MASK, // l*
       U_GC_L_MASK, // letter
       U_GC_NL_MASK, // letternumber
       U_GC_ZL_MASK, // lineseparator
       U_GC_LL_MASK, // ll
       U_GC_LM_MASK, // lm
       U_GC_LO_MASK, // lo
       U_GC_LL_MASK, // lowercaseletter
       U_GC_LT_MASK, // lt
       U_GC_LU_MASK, // lu
       U_GC_M_MASK, // m*
       U_GC_M_MASK, // mark
       U_GC_SM_MASK, // mathsymbol
       U_GC_MC_MASK, // mc
       U_GC_ME_MASK, // me
       U_GC_MN_MASK, // mn
       U_GC_LM_MASK, // modifierletter
       U_GC_SK_MASK, // modifiersymbol
       U_GC_N_MASK, // n*
       U_GC_ND_MASK, // nd
       U_GC_NL_MASK, // nl
       U_GC_NO_MASK, // no
       U_GC_MN_MASK, // nonspacingmark
       U_GC_CN_MASK, // notassigned
       U_GC_N_MASK, // number
       U_GC_PS_MASK, // openpunctuation
       U_GC_C_MASK, // other
       U_GC_LO_MASK, // otherletter
       U_GC_NO_MASK, // othernumber
       U_GC_PO_MASK, // otherpunctuation
       U_GC_SO_MASK, // othersymbol
       U_GC_P_MASK, // p*
       U_GC_ZP_MASK, // paragraphseparator
       U_GC_PC_MASK, // pc
       U_GC_PD_MASK, // pd
       U_GC_PE_MASK, // pe
       U_GC_PF_MASK, // pf
       U_GC_PI_MASK, // pi
       U_GC_PO_MASK, // po
       U_GC_CO_MASK, // privateuse
       U_GC_PS_MASK, // ps
       U_GC_P_MASK, // punctuation
       U_GC_S_MASK, // s*
       U_GC_SC_MASK, // sc
       U_GC_Z_MASK, // separator
       U_GC_SK_MASK, // sk
       U_GC_SM_MASK, // sm
       U_GC_SO_MASK, // so
       U_GC_ZS_MASK, // spaceseparator
       U_GC_MC_MASK, // spacingcombiningmark
       U_GC_CS_MASK, // surrogate
       U_GC_S_MASK, // symbol
       U_GC_LT_MASK, // titlecase
       U_GC_LT_MASK, // titlecaseletter
       U_GC_LU_MASK, // uppercaseletter
       U_GC_Z_MASK, // z*
       U_GC_ZL_MASK, // zl
       U_GC_ZP_MASK, // zp
       U_GC_ZS_MASK, // zs
    };


    static const re_detail::character_pointer_range< ::UChar32>* ranges_begin = range_data;
    static const re_detail::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data)/sizeof(range_data[0]));

    re_detail::character_pointer_range< ::UChar32> t = { p1, p2, };
    const re_detail::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t);
    if((p != ranges_end) && (t == *p))
       return icu_class_map[p - ranges_begin];
    return 0;
 }

 icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_type* p1, const char_type* p2) const
 {
    static const char_class_type masks[] =
    {
       0,
       U_GC_L_MASK | U_GC_ND_MASK,
       U_GC_L_MASK,
       mask_blank,
       U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK,
       U_GC_ND_MASK,
       U_GC_ND_MASK,
       (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK),
       mask_horizontal,
       U_GC_LL_MASK,
       U_GC_LL_MASK,
       ~(U_GC_C_MASK),
       U_GC_P_MASK,
       char_class_type(U_GC_Z_MASK) | mask_space,
       char_class_type(U_GC_Z_MASK) | mask_space,
       U_GC_LU_MASK,
       mask_unicode,
       U_GC_LU_MASK,
       mask_vertical,
       char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore,
       char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore,
       char_class_type(U_GC_ND_MASK) | mask_xdigit,
    };

    int idx = ::boost::re_detail::get_default_class_id(p1, p2);
    if(idx >= 0)
       return masks[idx+1];
    char_class_type result = lookup_icu_mask(p1, p2);
    if(result != 0)
       return result;

    if(idx < 0)
    {
       string_type s(p1, p2);
       string_type::size_type i = 0;
       while(i < s.size())
       {
          s[i] = static_cast<char>((::u_tolower)(s[i]));
          if(::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_'))
             s.erase(s.begin()+i, s.begin()+i+1);
          else
          {
             s[i] = static_cast<char>((::u_tolower)(s[i]));
             ++i;
          }
       }
       if(s.size())
          idx = ::boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
       if(idx >= 0)
          return masks[idx+1];
       if(s.size())
          result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size());
       if(result != 0)
          return result;
    }
    BOOST_ASSERT(std::size_t(idx+1) < sizeof(masks) / sizeof(masks[0]));
    return masks[idx+1];
 }

 icu_regex_traits::string_type icu_regex_traits::lookup_collatename(const char_type* p1, const char_type* p2) const
 {
    string_type result;
    if(std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2)
    {
 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
       std::string s(p1, p2);
 #else
       std::string s;
       const char_type* p3 = p1;
       while(p3 != p2)
          s.append(1, *p3++);
 #endif
       // Try Unicode name:
       UErrorCode err = U_ZERO_ERROR;
       UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err);
       if(U_SUCCESS(err))
       {
          result.push_back(c);
          return result;
       }
       // Try Unicode-extended name:
       err = U_ZERO_ERROR;
       c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err);
       if(U_SUCCESS(err))
       {
          result.push_back(c);
          return result;
       }
       // try POSIX name:
       s = ::boost::re_detail::lookup_default_collate_name(s);
 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
       result.assign(s.begin(), s.end());
 #else
       result.clear();
       std::string::const_iterator si, sj;
       si = s.begin();
       sj = s.end();
       while(si != sj)
          result.push_back(*si++);
 #endif
    }
    if(result.empty() && (p2-p1 == 1))
       result.push_back(*p1);
    return result;
 }

 bool icu_regex_traits::isctype(char_type c, char_class_type f) const
 {
    // check for standard catagories first:
    char_class_type m = char_class_type(1u << u_charType(c));
    if((m & f) != 0)
       return true;
    // now check for special cases:
    if(((f & mask_blank) != 0) && u_isblank(c))
       return true;
    if(((f & mask_space) != 0) && u_isspace(c))
       return true;
    if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0))
       return true;
    if(((f & mask_unicode) != 0) && (c >= 0x100))
       return true;
    if(((f & mask_underscore) != 0) && (c == '_'))
       return true;
    if(((f & mask_any) != 0) && (c <= 0x10FFFF))
       return true;
    if(((f & mask_ascii) != 0) && (c <= 0x7F))
       return true;
    if(((f & mask_vertical) != 0) && (::boost::re_detail::is_separator(c) || (c == static_cast<char_type>('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK)))
       return true;
    if(((f & mask_horizontal) != 0) && !::boost::re_detail::is_separator(c) && u_isspace(c) && (c != static_cast<char_type>('\v')))
       return true;
    return false;
 }

 }

 #endif // BOOST_HAS_ICU
	/*
	*
	* Copyright (c) 2004
	* John Maddock
	*
	* Use, modification and distribution are subject to the
	* Boost Software License, Version 1.0. (See accompanying file
	* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	*
	*/

	/*
	* LOCATION: see http://www.boost.org for most recent version.
	* FILE icu.cpp
	* VERSION see <boost/version.hpp>
	* DESCRIPTION: Unicode regular expressions on top of the ICU Library.
	*/
	#define BOOST_REGEX_SOURCE

	#include <boost/regex/config.hpp>
	#ifdef BOOST_HAS_ICU
	#define BOOST_REGEX_ICU_INSTANTIATE
	#include <boost/regex/icu.hpp>

	#ifdef BOOST_INTEL
	#pragma warning(disable:981 2259 383)
	#endif

	namespace boost{

	namespace re_detail{

	icu_regex_traits_implementation::string_type icu_regex_traits_implementation::do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const
	{
	// TODO make thread safe!!!! :
	typedef u32_to_u16_iterator<const char_type*, ::UChar> itt;
	itt i(p1), j(p2);
	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	std::vector< ::UChar> t(i, j);
	#else
	std::vector< ::UChar> t;
	while(i != j)
	t.push_back(*i++);
	#endif
	::uint8_t result[100];
	::int32_t len;
	if(t.size())
	len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result));
	else
	len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), result, sizeof(result));
	if(std::size_t(len) > sizeof(result))
	{
	scoped_array< ::uint8_t> presult(new ::uint8_t[len+1]);
	if(t.size())
	len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len+1);
	else
	len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), presult.get(), len+1);
	if((0 == presult[len-1]) && (len > 1))
	--len;
	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	return string_type(presult.get(), presult.get()+len);
	#else
	string_type sresult;
	::uint8_t const* ia = presult.get();
	::uint8_t const* ib = presult.get()+len;
	while(ia != ib)
	sresult.push_back(*ia++);
	return sresult;
	#endif
	}
	if((0 == result[len-1]) && (len > 1))
	--len;
	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	return string_type(result, result+len);
	#else
	string_type sresult;
	::uint8_t const* ia = result;
	::uint8_t const* ib = result+len;
	while(ia != ib)
	sresult.push_back(*ia++);
	return sresult;
	#endif
	}

	}

	icu_regex_traits::size_type icu_regex_traits::length(const char_type* p)
	{
	size_type result = 0;
	while(*p)
	{
	++p;
	++result;
	}
	return result;
	}

	//
	// define our bitmasks:
	//
	const icu_regex_traits::char_class_type icu_regex_traits::mask_blank = icu_regex_traits::char_class_type(1) << offset_blank;
	const icu_regex_traits::char_class_type icu_regex_traits::mask_space = icu_regex_traits::char_class_type(1) << offset_space;
	const icu_regex_traits::char_class_type icu_regex_traits::mask_xdigit = icu_regex_traits::char_class_type(1) << offset_xdigit;
	const icu_regex_traits::char_class_type icu_regex_traits::mask_underscore = icu_regex_traits::char_class_type(1) << offset_underscore;
	const icu_regex_traits::char_class_type icu_regex_traits::mask_unicode = icu_regex_traits::char_class_type(1) << offset_unicode;
	const icu_regex_traits::char_class_type icu_regex_traits::mask_any = icu_regex_traits::char_class_type(1) << offset_any;
	const icu_regex_traits::char_class_type icu_regex_traits::mask_ascii = icu_regex_traits::char_class_type(1) << offset_ascii;
	const icu_regex_traits::char_class_type icu_regex_traits::mask_horizontal = icu_regex_traits::char_class_type(1) << offset_horizontal;
	const icu_regex_traits::char_class_type icu_regex_traits::mask_vertical = icu_regex_traits::char_class_type(1) << offset_vertical;

	icu_regex_traits::char_class_type icu_regex_traits::lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2)
	{
	static const ::UChar32 prop_name_table[] = {
	/* any */ 'a', 'n', 'y',
	/* ascii */ 'a', 's', 'c', 'i', 'i',
	/* assigned */ 'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
	/* c* / 'c', '',
	/* cc */ 'c', 'c',
	/* cf */ 'c', 'f',
	/* closepunctuation */ 'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	/* cn */ 'c', 'n',
	/* co */ 'c', 'o',
	/* connectorpunctuation */ 'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	/* control */ 'c', 'o', 'n', 't', 'r', 'o', 'l',
	/* cs */ 'c', 's',
	/* currencysymbol */ 'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l',
	/* dashpunctuation */ 'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	/* decimaldigitnumber */ 'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r',
	/* enclosingmark */ 'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
	/* finalpunctuation */ 'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	/* format */ 'f', 'o', 'r', 'm', 'a', 't',
	/* initialpunctuation */ 'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	/* l* / 'l', '',
	/* letter */ 'l', 'e', 't', 't', 'e', 'r',
	/* letternumber */ 'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
	/* lineseparator */ 'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
	/* ll */ 'l', 'l',
	/* lm */ 'l', 'm',
	/* lo */ 'l', 'o',
	/* lowercaseletter */ 'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
	/* lt */ 'l', 't',
	/* lu */ 'l', 'u',
	/* m* / 'm', '',
	/* mark */ 'm', 'a', 'r', 'k',
	/* mathsymbol */ 'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l',
	/* mc */ 'm', 'c',
	/* me */ 'm', 'e',
	/* mn */ 'm', 'n',
	/* modifierletter */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
	/* modifiersymbol */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
	/* n* / 'n', '',
	/* nd */ 'n', 'd',
	/* nl */ 'n', 'l',
	/* no */ 'n', 'o',
	/* nonspacingmark */ 'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
	/* notassigned */ 'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
	/* number */ 'n', 'u', 'm', 'b', 'e', 'r',
	/* openpunctuation */ 'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	/* other */ 'o', 't', 'h', 'e', 'r',
	/* otherletter */ 'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
	/* othernumber */ 'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
	/* otherpunctuation */ 'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	/* othersymbol */ 'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
	/* p* / 'p', '',
	/* paragraphseparator */ 'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
	/* pc */ 'p', 'c',
	/* pd */ 'p', 'd',
	/* pe */ 'p', 'e',
	/* pf */ 'p', 'f',
	/* pi */ 'p', 'i',
	/* po */ 'p', 'o',
	/* privateuse */ 'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e',
	/* ps */ 'p', 's',
	/* punctuation */ 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	/* s* / 's', '',
	/* sc */ 's', 'c',
	/* separator */ 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
	/* sk */ 's', 'k',
	/* sm */ 's', 'm',
	/* so */ 's', 'o',
	/* spaceseparator */ 's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
	/* spacingcombiningmark */ 's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
	/* surrogate */ 's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e',
	/* symbol */ 's', 'y', 'm', 'b', 'o', 'l',
	/* titlecase */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e',
	/* titlecaseletter */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
	/* uppercaseletter */ 'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
	/* z* / 'z', '',
	/* zl */ 'z', 'l',
	/* zp */ 'z', 'p',
	/* zs */ 'z', 's',
	};

	static const re_detail::character_pointer_range< ::UChar32> range_data[] = {
	{ prop_name_table+0, prop_name_table+3, }, // any
	{ prop_name_table+3, prop_name_table+8, }, // ascii
	{ prop_name_table+8, prop_name_table+16, }, // assigned
	{ prop_name_table+16, prop_name_table+18, }, // c*
	{ prop_name_table+18, prop_name_table+20, }, // cc
	{ prop_name_table+20, prop_name_table+22, }, // cf
	{ prop_name_table+22, prop_name_table+38, }, // closepunctuation
	{ prop_name_table+38, prop_name_table+40, }, // cn
	{ prop_name_table+40, prop_name_table+42, }, // co
	{ prop_name_table+42, prop_name_table+62, }, // connectorpunctuation
	{ prop_name_table+62, prop_name_table+69, }, // control
	{ prop_name_table+69, prop_name_table+71, }, // cs
	{ prop_name_table+71, prop_name_table+85, }, // currencysymbol
	{ prop_name_table+85, prop_name_table+100, }, // dashpunctuation
	{ prop_name_table+100, prop_name_table+118, }, // decimaldigitnumber
	{ prop_name_table+118, prop_name_table+131, }, // enclosingmark
	{ prop_name_table+131, prop_name_table+147, }, // finalpunctuation
	{ prop_name_table+147, prop_name_table+153, }, // format
	{ prop_name_table+153, prop_name_table+171, }, // initialpunctuation
	{ prop_name_table+171, prop_name_table+173, }, // l*
	{ prop_name_table+173, prop_name_table+179, }, // letter
	{ prop_name_table+179, prop_name_table+191, }, // letternumber
	{ prop_name_table+191, prop_name_table+204, }, // lineseparator
	{ prop_name_table+204, prop_name_table+206, }, // ll
	{ prop_name_table+206, prop_name_table+208, }, // lm
	{ prop_name_table+208, prop_name_table+210, }, // lo
	{ prop_name_table+210, prop_name_table+225, }, // lowercaseletter
	{ prop_name_table+225, prop_name_table+227, }, // lt
	{ prop_name_table+227, prop_name_table+229, }, // lu
	{ prop_name_table+229, prop_name_table+231, }, // m*
	{ prop_name_table+231, prop_name_table+235, }, // mark
	{ prop_name_table+235, prop_name_table+245, }, // mathsymbol
	{ prop_name_table+245, prop_name_table+247, }, // mc
	{ prop_name_table+247, prop_name_table+249, }, // me
	{ prop_name_table+249, prop_name_table+251, }, // mn
	{ prop_name_table+251, prop_name_table+265, }, // modifierletter
	{ prop_name_table+265, prop_name_table+279, }, // modifiersymbol
	{ prop_name_table+279, prop_name_table+281, }, // n*
	{ prop_name_table+281, prop_name_table+283, }, // nd
	{ prop_name_table+283, prop_name_table+285, }, // nl
	{ prop_name_table+285, prop_name_table+287, }, // no
	{ prop_name_table+287, prop_name_table+301, }, // nonspacingmark
	{ prop_name_table+301, prop_name_table+312, }, // notassigned
	{ prop_name_table+312, prop_name_table+318, }, // number
	{ prop_name_table+318, prop_name_table+333, }, // openpunctuation
	{ prop_name_table+333, prop_name_table+338, }, // other
	{ prop_name_table+338, prop_name_table+349, }, // otherletter
	{ prop_name_table+349, prop_name_table+360, }, // othernumber
	{ prop_name_table+360, prop_name_table+376, }, // otherpunctuation
	{ prop_name_table+376, prop_name_table+387, }, // othersymbol
	{ prop_name_table+387, prop_name_table+389, }, // p*
	{ prop_name_table+389, prop_name_table+407, }, // paragraphseparator
	{ prop_name_table+407, prop_name_table+409, }, // pc
	{ prop_name_table+409, prop_name_table+411, }, // pd
	{ prop_name_table+411, prop_name_table+413, }, // pe
	{ prop_name_table+413, prop_name_table+415, }, // pf
	{ prop_name_table+415, prop_name_table+417, }, // pi
	{ prop_name_table+417, prop_name_table+419, }, // po
	{ prop_name_table+419, prop_name_table+429, }, // privateuse
	{ prop_name_table+429, prop_name_table+431, }, // ps
	{ prop_name_table+431, prop_name_table+442, }, // punctuation
	{ prop_name_table+442, prop_name_table+444, }, // s*
	{ prop_name_table+444, prop_name_table+446, }, // sc
	{ prop_name_table+446, prop_name_table+455, }, // separator
	{ prop_name_table+455, prop_name_table+457, }, // sk
	{ prop_name_table+457, prop_name_table+459, }, // sm
	{ prop_name_table+459, prop_name_table+461, }, // so
	{ prop_name_table+461, prop_name_table+475, }, // spaceseparator
	{ prop_name_table+475, prop_name_table+495, }, // spacingcombiningmark
	{ prop_name_table+495, prop_name_table+504, }, // surrogate
	{ prop_name_table+504, prop_name_table+510, }, // symbol
	{ prop_name_table+510, prop_name_table+519, }, // titlecase
	{ prop_name_table+519, prop_name_table+534, }, // titlecaseletter
	{ prop_name_table+534, prop_name_table+549, }, // uppercaseletter
	{ prop_name_table+549, prop_name_table+551, }, // z*
	{ prop_name_table+551, prop_name_table+553, }, // zl
	{ prop_name_table+553, prop_name_table+555, }, // zp
	{ prop_name_table+555, prop_name_table+557, }, // zs
	};

	static const icu_regex_traits::char_class_type icu_class_map[] = {
	icu_regex_traits::mask_any, // any
	icu_regex_traits::mask_ascii, // ascii
	(0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned
	U_GC_C_MASK, // c*
	U_GC_CC_MASK, // cc
	U_GC_CF_MASK, // cf
	U_GC_PE_MASK, // closepunctuation
	U_GC_CN_MASK, // cn
	U_GC_CO_MASK, // co
	U_GC_PC_MASK, // connectorpunctuation
	U_GC_CC_MASK, // control
	U_GC_CS_MASK, // cs
	U_GC_SC_MASK, // currencysymbol
	U_GC_PD_MASK, // dashpunctuation
	U_GC_ND_MASK, // decimaldigitnumber
	U_GC_ME_MASK, // enclosingmark
	U_GC_PF_MASK, // finalpunctuation
	U_GC_CF_MASK, // format
	U_GC_PI_MASK, // initialpunctuation
	U_GC_L_MASK, // l*
	U_GC_L_MASK, // letter
	U_GC_NL_MASK, // letternumber
	U_GC_ZL_MASK, // lineseparator
	U_GC_LL_MASK, // ll
	U_GC_LM_MASK, // lm
	U_GC_LO_MASK, // lo
	U_GC_LL_MASK, // lowercaseletter
	U_GC_LT_MASK, // lt
	U_GC_LU_MASK, // lu
	U_GC_M_MASK, // m*
	U_GC_M_MASK, // mark
	U_GC_SM_MASK, // mathsymbol
	U_GC_MC_MASK, // mc
	U_GC_ME_MASK, // me
	U_GC_MN_MASK, // mn
	U_GC_LM_MASK, // modifierletter
	U_GC_SK_MASK, // modifiersymbol
	U_GC_N_MASK, // n*
	U_GC_ND_MASK, // nd
	U_GC_NL_MASK, // nl
	U_GC_NO_MASK, // no
	U_GC_MN_MASK, // nonspacingmark
	U_GC_CN_MASK, // notassigned
	U_GC_N_MASK, // number
	U_GC_PS_MASK, // openpunctuation
	U_GC_C_MASK, // other
	U_GC_LO_MASK, // otherletter
	U_GC_NO_MASK, // othernumber
	U_GC_PO_MASK, // otherpunctuation
	U_GC_SO_MASK, // othersymbol
	U_GC_P_MASK, // p*
	U_GC_ZP_MASK, // paragraphseparator
	U_GC_PC_MASK, // pc
	U_GC_PD_MASK, // pd
	U_GC_PE_MASK, // pe
	U_GC_PF_MASK, // pf
	U_GC_PI_MASK, // pi
	U_GC_PO_MASK, // po
	U_GC_CO_MASK, // privateuse
	U_GC_PS_MASK, // ps
	U_GC_P_MASK, // punctuation
	U_GC_S_MASK, // s*
	U_GC_SC_MASK, // sc
	U_GC_Z_MASK, // separator
	U_GC_SK_MASK, // sk
	U_GC_SM_MASK, // sm
	U_GC_SO_MASK, // so
	U_GC_ZS_MASK, // spaceseparator
	U_GC_MC_MASK, // spacingcombiningmark
	U_GC_CS_MASK, // surrogate
	U_GC_S_MASK, // symbol
	U_GC_LT_MASK, // titlecase
	U_GC_LT_MASK, // titlecaseletter
	U_GC_LU_MASK, // uppercaseletter
	U_GC_Z_MASK, // z*
	U_GC_ZL_MASK, // zl
	U_GC_ZP_MASK, // zp
	U_GC_ZS_MASK, // zs
	};


	static const re_detail::character_pointer_range< ::UChar32>* ranges_begin = range_data;
	static const re_detail::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data)/sizeof(range_data[0]));

	re_detail::character_pointer_range< ::UChar32> t = { p1, p2, };
	const re_detail::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t);
	if((p != ranges_end) && (t == *p))
	return icu_class_map[p - ranges_begin];
	return 0;
	}

	icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_type* p1, const char_type* p2) const
	{
	static const char_class_type masks[] =
	{
	0,
	U_GC_L_MASK \| U_GC_ND_MASK,
	U_GC_L_MASK,
	mask_blank,
	U_GC_CC_MASK \| U_GC_CF_MASK \| U_GC_ZL_MASK \| U_GC_ZP_MASK,
	U_GC_ND_MASK,
	U_GC_ND_MASK,
	(0x3FFFFFFFu) & ~(U_GC_CC_MASK \| U_GC_CF_MASK \| U_GC_CS_MASK \| U_GC_CN_MASK \| U_GC_Z_MASK),
	mask_horizontal,
	U_GC_LL_MASK,
	U_GC_LL_MASK,
	~(U_GC_C_MASK),
	U_GC_P_MASK,
	char_class_type(U_GC_Z_MASK) \| mask_space,
	char_class_type(U_GC_Z_MASK) \| mask_space,
	U_GC_LU_MASK,
	mask_unicode,
	U_GC_LU_MASK,
	mask_vertical,
	char_class_type(U_GC_L_MASK \| U_GC_ND_MASK \| U_GC_MN_MASK) \| mask_underscore,
	char_class_type(U_GC_L_MASK \| U_GC_ND_MASK \| U_GC_MN_MASK) \| mask_underscore,
	char_class_type(U_GC_ND_MASK) \| mask_xdigit,
	};

	int idx = ::boost::re_detail::get_default_class_id(p1, p2);
	if(idx >= 0)
	return masks[idx+1];
	char_class_type result = lookup_icu_mask(p1, p2);
	if(result != 0)
	return result;

	if(idx < 0)
	{
	string_type s(p1, p2);
	string_type::size_type i = 0;
	while(i < s.size())
	{
	s[i] = static_cast<char>((::u_tolower)(s[i]));
	if(::u_isspace(s[i]) \|\| (s[i] == '-') \|\| (s[i] == '_'))
	s.erase(s.begin()+i, s.begin()+i+1);
	else
	{
	s[i] = static_cast<char>((::u_tolower)(s[i]));
	++i;
	}
	}
	if(s.size())
	idx = ::boost::re_detail::get_default_class_id(&s.begin(), &s.begin() + s.size());
	if(idx >= 0)
	return masks[idx+1];
	if(s.size())
	result = lookup_icu_mask(&s.begin(), &s.begin() + s.size());
	if(result != 0)
	return result;
	}
	BOOST_ASSERT(std::size_t(idx+1) < sizeof(masks) / sizeof(masks[0]));
	return masks[idx+1];
	}

	icu_regex_traits::string_type icu_regex_traits::lookup_collatename(const char_type* p1, const char_type* p2) const
	{
	string_type result;
	if(std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2)
	{
	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	std::string s(p1, p2);
	#else
	std::string s;
	const char_type* p3 = p1;
	while(p3 != p2)
	s.append(1, *p3++);
	#endif
	// Try Unicode name:
	UErrorCode err = U_ZERO_ERROR;
	UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err);
	if(U_SUCCESS(err))
	{
	result.push_back(c);
	return result;
	}
	// Try Unicode-extended name:
	err = U_ZERO_ERROR;
	c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err);
	if(U_SUCCESS(err))
	{
	result.push_back(c);
	return result;
	}
	// try POSIX name:
	s = ::boost::re_detail::lookup_default_collate_name(s);
	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	result.assign(s.begin(), s.end());
	#else
	result.clear();
	std::string::const_iterator si, sj;
	si = s.begin();
	sj = s.end();
	while(si != sj)
	result.push_back(*si++);
	#endif
	}
	if(result.empty() && (p2-p1 == 1))
	result.push_back(*p1);
	return result;
	}

	bool icu_regex_traits::isctype(char_type c, char_class_type f) const
	{
	// check for standard catagories first:
	char_class_type m = char_class_type(1u << u_charType(c));
	if((m & f) != 0)
	return true;
	// now check for special cases:
	if(((f & mask_blank) != 0) && u_isblank(c))
	return true;
	if(((f & mask_space) != 0) && u_isspace(c))
	return true;
	if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0))
	return true;
	if(((f & mask_unicode) != 0) && (c >= 0x100))
	return true;
	if(((f & mask_underscore) != 0) && (c == '_'))
	return true;
	if(((f & mask_any) != 0) && (c <= 0x10FFFF))
	return true;
	if(((f & mask_ascii) != 0) && (c <= 0x7F))
	return true;
	if(((f & mask_vertical) != 0) && (::boost::re_detail::is_separator(c) \|\| (c == static_cast<char_type>('\v')) \|\| (m == U_GC_ZL_MASK) \|\| (m == U_GC_ZP_MASK)))
	return true;
	if(((f & mask_horizontal) != 0) && !::boost::re_detail::is_separator(c) && u_isspace(c) && (c != static_cast<char_type>('\v')))
	return true;
	return false;
	}

	}

	#endif // BOOST_HAS_ICU