| ////////////////////////////////////////////////////////////////////////////// |
| /// \file c_regex_traits.hpp |
| /// Contains the definition of the c_regex_traits\<\> template, which is a |
| /// wrapper for the C locale functions that can be used to customize the |
| /// behavior of static and dynamic regexes. |
| // |
| // Copyright 2008 Eric Niebler. Distributed under the Boost |
| // Software License, Version 1.0. (See accompanying file |
| // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| |
| #ifndef BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005 |
| #define BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005 |
| |
| // MS compatible compilers support #pragma once |
| #if defined(_MSC_VER) && (_MSC_VER >= 1020) |
| # pragma once |
| #endif |
| |
| #include <cstdlib> |
| #include <boost/config.hpp> |
| #include <boost/assert.hpp> |
| #include <boost/xpressive/traits/detail/c_ctype.hpp> |
| |
| namespace boost { namespace xpressive |
| { |
| |
| namespace detail |
| { |
| /////////////////////////////////////////////////////////////////////////////// |
| // empty_locale |
| struct empty_locale |
| { |
| }; |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // c_regex_traits_base |
| template<typename Char, std::size_t SizeOfChar = sizeof(Char)> |
| struct c_regex_traits_base |
| { |
| protected: |
| template<typename Traits> |
| void imbue(Traits const &tr) |
| { |
| } |
| }; |
| |
| template<typename Char> |
| struct c_regex_traits_base<Char, 1> |
| { |
| protected: |
| template<typename Traits> |
| static void imbue(Traits const &) |
| { |
| } |
| }; |
| |
| #ifndef BOOST_XPRESSIVE_NO_WREGEX |
| template<std::size_t SizeOfChar> |
| struct c_regex_traits_base<wchar_t, SizeOfChar> |
| { |
| protected: |
| template<typename Traits> |
| static void imbue(Traits const &) |
| { |
| } |
| }; |
| #endif |
| |
| template<typename Char> |
| Char c_tolower(Char); |
| |
| template<typename Char> |
| Char c_toupper(Char); |
| |
| template<> |
| inline char c_tolower(char ch) |
| { |
| using namespace std; |
| return static_cast<char>(tolower(static_cast<unsigned char>(ch))); |
| } |
| |
| template<> |
| inline char c_toupper(char ch) |
| { |
| using namespace std; |
| return static_cast<char>(toupper(static_cast<unsigned char>(ch))); |
| } |
| |
| #ifndef BOOST_XPRESSIVE_NO_WREGEX |
| template<> |
| inline wchar_t c_tolower(wchar_t ch) |
| { |
| using namespace std; |
| return towlower(ch); |
| } |
| |
| template<> |
| inline wchar_t c_toupper(wchar_t ch) |
| { |
| using namespace std; |
| return towupper(ch); |
| } |
| #endif |
| |
| } // namespace detail |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // regex_traits_version_1_tag |
| // |
| struct regex_traits_version_1_tag; |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // c_regex_traits |
| // |
| /// \brief Encapsaulates the standard C locale functions for use by the |
| /// basic_regex\<\> class template. |
| template<typename Char> |
| struct c_regex_traits |
| : detail::c_regex_traits_base<Char> |
| { |
| typedef Char char_type; |
| typedef std::basic_string<char_type> string_type; |
| typedef detail::empty_locale locale_type; |
| typedef typename detail::char_class_impl<Char>::char_class_type char_class_type; |
| typedef regex_traits_version_2_tag version_tag; |
| typedef detail::c_regex_traits_base<Char> base_type; |
| |
| /// Initialize a c_regex_traits object to use the global C locale. |
| /// |
| c_regex_traits(locale_type const &loc = locale_type()) |
| : base_type() |
| { |
| this->imbue(loc); |
| } |
| |
| /// Checks two c_regex_traits objects for equality |
| /// |
| /// \return true. |
| bool operator ==(c_regex_traits<char_type> const &) const |
| { |
| return true; |
| } |
| |
| /// Checks two c_regex_traits objects for inequality |
| /// |
| /// \return false. |
| bool operator !=(c_regex_traits<char_type> const &) const |
| { |
| return false; |
| } |
| |
| /// Convert a char to a Char |
| /// |
| /// \param ch The source character. |
| /// \return ch if Char is char, std::btowc(ch) if Char is wchar_t. |
| static char_type widen(char ch); |
| |
| /// Returns a hash value for a Char in the range [0, UCHAR_MAX] |
| /// |
| /// \param ch The source character. |
| /// \return a value between 0 and UCHAR_MAX, inclusive. |
| static unsigned char hash(char_type ch) |
| { |
| return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch)); |
| } |
| |
| /// No-op |
| /// |
| /// \param ch The source character. |
| /// \return ch |
| static char_type translate(char_type ch) |
| { |
| return ch; |
| } |
| |
| /// Converts a character to lower-case using the current global C locale. |
| /// |
| /// \param ch The source character. |
| /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t. |
| static char_type translate_nocase(char_type ch) |
| { |
| return detail::c_tolower(ch); |
| } |
| |
| /// Converts a character to lower-case using the current global C locale. |
| /// |
| /// \param ch The source character. |
| /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t. |
| static char_type tolower(char_type ch) |
| { |
| return detail::c_tolower(ch); |
| } |
| |
| /// Converts a character to upper-case using the current global C locale. |
| /// |
| /// \param ch The source character. |
| /// \return std::toupper(ch) if Char is char, std::towupper(ch) if Char is wchar_t. |
| static char_type toupper(char_type ch) |
| { |
| return detail::c_toupper(ch); |
| } |
| |
| /// Returns a string_type containing all the characters that compare equal |
| /// disregrarding case to the one passed in. This function can only be called |
| /// if has_fold_case<c_regex_traits<Char> >::value is true. |
| /// |
| /// \param ch The source character. |
| /// \return string_type containing all chars which are equal to ch when disregarding |
| /// case |
| //typedef array<char_type, 2> fold_case_type; |
| string_type fold_case(char_type ch) const |
| { |
| BOOST_MPL_ASSERT((is_same<char_type, char>)); |
| char_type ntcs[] = { |
| detail::c_tolower(ch) |
| , detail::c_toupper(ch) |
| , 0 |
| }; |
| if(ntcs[1] == ntcs[0]) |
| ntcs[1] = 0; |
| return string_type(ntcs); |
| } |
| |
| /// Checks to see if a character is within a character range. |
| /// |
| /// \param first The bottom of the range, inclusive. |
| /// \param last The top of the range, inclusive. |
| /// \param ch The source character. |
| /// \return first <= ch && ch <= last. |
| static bool in_range(char_type first, char_type last, char_type ch) |
| { |
| return first <= ch && ch <= last; |
| } |
| |
| /// Checks to see if a character is within a character range, irregardless of case. |
| /// |
| /// \param first The bottom of the range, inclusive. |
| /// \param last The top of the range, inclusive. |
| /// \param ch The source character. |
| /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch)) || in_range(first, |
| /// last, toupper(ch)) |
| /// \attention The default implementation doesn't do proper Unicode |
| /// case folding, but this is the best we can do with the standard |
| /// C locale functions. |
| static bool in_range_nocase(char_type first, char_type last, char_type ch) |
| { |
| return c_regex_traits::in_range(first, last, ch) |
| || c_regex_traits::in_range(first, last, detail::c_tolower(ch)) |
| || c_regex_traits::in_range(first, last, detail::c_toupper(ch)); |
| } |
| |
| /// Returns a sort key for the character sequence designated by the iterator range [F1, F2) |
| /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2) |
| /// then v.transform(G1, G2) < v.transform(H1, H2). |
| /// |
| /// \attention Not currently used |
| template<typename FwdIter> |
| static string_type transform(FwdIter begin, FwdIter end) |
| { |
| BOOST_ASSERT(false); // BUGBUG implement me |
| } |
| |
| /// Returns a sort key for the character sequence designated by the iterator range [F1, F2) |
| /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2) |
| /// when character case is not considered then |
| /// v.transform_primary(G1, G2) < v.transform_primary(H1, H2). |
| /// |
| /// \attention Not currently used |
| template<typename FwdIter> |
| static string_type transform_primary(FwdIter begin, FwdIter end) |
| { |
| BOOST_ASSERT(false); // BUGBUG implement me |
| } |
| |
| /// Returns a sequence of characters that represents the collating element |
| /// consisting of the character sequence designated by the iterator range [F1, F2). |
| /// Returns an empty string if the character sequence is not a valid collating element. |
| /// |
| /// \attention Not currently used |
| template<typename FwdIter> |
| static string_type lookup_collatename(FwdIter begin, FwdIter end) |
| { |
| BOOST_ASSERT(false); // BUGBUG implement me |
| } |
| |
| /// For the character class name represented by the specified character sequence, |
| /// return the corresponding bitmask representation. |
| /// |
| /// \param begin A forward iterator to the start of the character sequence representing |
| /// the name of the character class. |
| /// \param end The end of the character sequence. |
| /// \param icase Specifies whether the returned bitmask should represent the case-insensitive |
| /// version of the character class. |
| /// \return A bitmask representing the character class. |
| template<typename FwdIter> |
| static char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) |
| { |
| return detail::char_class_impl<char_type>::lookup_classname(begin, end, icase); |
| } |
| |
| /// Tests a character against a character class bitmask. |
| /// |
| /// \param ch The character to test. |
| /// \param mask The character class bitmask against which to test. |
| /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed |
| /// together. |
| /// \return true if the character is a member of any of the specified character classes, false |
| /// otherwise. |
| static bool isctype(char_type ch, char_class_type mask) |
| { |
| return detail::char_class_impl<char_type>::isctype(ch, mask); |
| } |
| |
| /// Convert a digit character into the integer it represents. |
| /// |
| /// \param ch The digit character. |
| /// \param radix The radix to use for the conversion. |
| /// \pre radix is one of 8, 10, or 16. |
| /// \return -1 if ch is not a digit character, the integer value of the character otherwise. If |
| /// char_type is char, std::strtol is used for the conversion. If char_type is wchar_t, |
| /// std::wcstol is used. |
| static int value(char_type ch, int radix); |
| |
| /// No-op |
| /// |
| locale_type imbue(locale_type loc) |
| { |
| this->base_type::imbue(*this); |
| return loc; |
| } |
| |
| /// No-op |
| /// |
| static locale_type getloc() |
| { |
| locale_type loc; |
| return loc; |
| } |
| }; |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // c_regex_traits<>::widen specializations |
| /// INTERNAL ONLY |
| template<> |
| inline char c_regex_traits<char>::widen(char ch) |
| { |
| return ch; |
| } |
| |
| #ifndef BOOST_XPRESSIVE_NO_WREGEX |
| /// INTERNAL ONLY |
| template<> |
| inline wchar_t c_regex_traits<wchar_t>::widen(char ch) |
| { |
| using namespace std; |
| return btowc(ch); |
| } |
| #endif |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // c_regex_traits<>::hash specializations |
| /// INTERNAL ONLY |
| template<> |
| inline unsigned char c_regex_traits<char>::hash(char ch) |
| { |
| return static_cast<unsigned char>(ch); |
| } |
| |
| #ifndef BOOST_XPRESSIVE_NO_WREGEX |
| /// INTERNAL ONLY |
| template<> |
| inline unsigned char c_regex_traits<wchar_t>::hash(wchar_t ch) |
| { |
| return static_cast<unsigned char>(ch); |
| } |
| #endif |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // c_regex_traits<>::value specializations |
| /// INTERNAL ONLY |
| template<> |
| inline int c_regex_traits<char>::value(char ch, int radix) |
| { |
| using namespace std; |
| BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix); |
| char begin[2] = { ch, '\0' }, *end = 0; |
| int val = strtol(begin, &end, radix); |
| return begin == end ? -1 : val; |
| } |
| |
| #ifndef BOOST_XPRESSIVE_NO_WREGEX |
| /// INTERNAL ONLY |
| template<> |
| inline int c_regex_traits<wchar_t>::value(wchar_t ch, int radix) |
| { |
| using namespace std; |
| BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix); |
| wchar_t begin[2] = { ch, L'\0' }, *end = 0; |
| int val = wcstol(begin, &end, radix); |
| return begin == end ? -1 : val; |
| } |
| #endif |
| |
| // Narrow C traits has fold_case() member function. |
| template<> |
| struct has_fold_case<c_regex_traits<char> > |
| : mpl::true_ |
| { |
| }; |
| |
| }} |
| |
| #endif |