| /////////////////////////////////////////////////////////////////////////////// |
| // detail/dynamic/parser_traits.hpp |
| // |
| // Copyright 2008 Eric Niebler. Distributed under the Boost |
| // Software License, Version 1.0. (See accompanying file |
| // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| |
| #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005 |
| #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005 |
| |
| // MS compatible compilers support #pragma once |
| #if defined(_MSC_VER) && (_MSC_VER >= 1020) |
| # pragma once |
| #endif |
| |
| #include <string> |
| #include <climits> |
| #include <boost/assert.hpp> |
| #include <boost/throw_exception.hpp> |
| #include <boost/xpressive/regex_error.hpp> |
| #include <boost/xpressive/regex_traits.hpp> |
| #include <boost/xpressive/detail/detail_fwd.hpp> |
| #include <boost/xpressive/detail/dynamic/matchable.hpp> |
| #include <boost/xpressive/detail/dynamic/parser_enum.hpp> |
| #include <boost/xpressive/detail/utility/literals.hpp> |
| #include <boost/xpressive/detail/utility/algorithm.hpp> |
| |
| namespace boost { namespace xpressive |
| { |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // compiler_traits |
| // this works for char and wchar_t. it must be specialized for anything else. |
| // |
| template<typename RegexTraits> |
| struct compiler_traits |
| { |
| typedef RegexTraits regex_traits; |
| typedef typename regex_traits::char_type char_type; |
| typedef typename regex_traits::string_type string_type; |
| typedef typename regex_traits::locale_type locale_type; |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // constructor |
| explicit compiler_traits(RegexTraits const &traits = RegexTraits()) |
| : traits_(traits) |
| , flags_(regex_constants::ECMAScript) |
| , space_(lookup_classname(traits_, "space")) |
| , alnum_(lookup_classname(traits_, "alnum")) |
| { |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // flags |
| regex_constants::syntax_option_type flags() const |
| { |
| return this->flags_; |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // flags |
| void flags(regex_constants::syntax_option_type flags) |
| { |
| this->flags_ = flags; |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // traits |
| regex_traits &traits() |
| { |
| return this->traits_; |
| } |
| |
| regex_traits const &traits() const |
| { |
| return this->traits_; |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // imbue |
| locale_type imbue(locale_type const &loc) |
| { |
| locale_type oldloc = this->traits().imbue(loc); |
| this->space_ = lookup_classname(this->traits(), "space"); |
| this->alnum_ = lookup_classname(this->traits(), "alnum"); |
| return oldloc; |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // getloc |
| locale_type getloc() const |
| { |
| return this->traits().getloc(); |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // get_token |
| // get a token and advance the iterator |
| template<typename FwdIter> |
| regex_constants::compiler_token_type get_token(FwdIter &begin, FwdIter end) |
| { |
| using namespace regex_constants; |
| if(this->eat_ws_(begin, end) == end) |
| { |
| return regex_constants::token_end_of_pattern; |
| } |
| |
| switch(*begin) |
| { |
| case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end); |
| case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any; |
| case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line; |
| case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line; |
| case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin; |
| case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end; |
| case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate; |
| case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin; |
| |
| case BOOST_XPR_CHAR_(char_type, '*'): |
| case BOOST_XPR_CHAR_(char_type, '+'): |
| case BOOST_XPR_CHAR_(char_type, '?'): |
| return token_invalid_quantifier; |
| |
| case BOOST_XPR_CHAR_(char_type, ']'): |
| case BOOST_XPR_CHAR_(char_type, '{'): |
| default: |
| return token_literal; |
| } |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // get_quant_spec |
| template<typename FwdIter> |
| bool get_quant_spec(FwdIter &begin, FwdIter end, detail::quant_spec &spec) |
| { |
| using namespace regex_constants; |
| FwdIter old_begin; |
| |
| if(this->eat_ws_(begin, end) == end) |
| { |
| return false; |
| } |
| |
| switch(*begin) |
| { |
| case BOOST_XPR_CHAR_(char_type, '*'): |
| spec.min_ = 0; |
| spec.max_ = (std::numeric_limits<unsigned int>::max)(); |
| break; |
| |
| case BOOST_XPR_CHAR_(char_type, '+'): |
| spec.min_ = 1; |
| spec.max_ = (std::numeric_limits<unsigned int>::max)(); |
| break; |
| |
| case BOOST_XPR_CHAR_(char_type, '?'): |
| spec.min_ = 0; |
| spec.max_ = 1; |
| break; |
| |
| case BOOST_XPR_CHAR_(char_type, '{'): |
| old_begin = this->eat_ws_(++begin, end); |
| spec.min_ = spec.max_ = detail::toi(begin, end, this->traits()); |
| BOOST_XPR_ENSURE_ |
| ( |
| begin != old_begin && begin != end, error_brace, "invalid quantifier" |
| ); |
| |
| if(*begin == BOOST_XPR_CHAR_(char_type, ',')) |
| { |
| old_begin = this->eat_ws_(++begin, end); |
| spec.max_ = detail::toi(begin, end, this->traits()); |
| BOOST_XPR_ENSURE_ |
| ( |
| begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin |
| , error_brace, "invalid quantifier" |
| ); |
| |
| if(begin == old_begin) |
| { |
| spec.max_ = (std::numeric_limits<unsigned int>::max)(); |
| } |
| else |
| { |
| BOOST_XPR_ENSURE_ |
| ( |
| spec.min_ <= spec.max_, error_badbrace, "invalid quantification range" |
| ); |
| } |
| } |
| else |
| { |
| BOOST_XPR_ENSURE_ |
| ( |
| BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier" |
| ); |
| } |
| break; |
| |
| default: |
| return false; |
| } |
| |
| spec.greedy_ = true; |
| if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin) |
| { |
| ++begin; |
| spec.greedy_ = false; |
| } |
| |
| return true; |
| } |
| |
| /////////////////////////////////////////////////////////////////////////// |
| // get_group_type |
| template<typename FwdIter> |
| regex_constants::compiler_token_type get_group_type(FwdIter &begin, FwdIter end, string_type &name) |
| { |
| using namespace regex_constants; |
| if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin) |
| { |
| this->eat_ws_(++begin, end); |
| BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); |
| |
| switch(*begin) |
| { |
| case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark; |
| case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression; |
| case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment; |
| case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead; |
| case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead; |
| case BOOST_XPR_CHAR_(char_type, 'R'): ++begin; return token_recurse; |
| case BOOST_XPR_CHAR_(char_type, '$'): |
| this->get_name_(++begin, end, name); |
| BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); |
| if(BOOST_XPR_CHAR_(char_type, '=') == *begin) |
| { |
| ++begin; |
| return token_rule_assign; |
| } |
| return token_rule_ref; |
| |
| case BOOST_XPR_CHAR_(char_type, '<'): |
| this->eat_ws_(++begin, end); |
| BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); |
| switch(*begin) |
| { |
| case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind; |
| case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind; |
| default: |
| BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension")); |
| } |
| |
| case BOOST_XPR_CHAR_(char_type, 'P'): |
| this->eat_ws_(++begin, end); |
| BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); |
| switch(*begin) |
| { |
| case BOOST_XPR_CHAR_(char_type, '<'): |
| this->get_name_(++begin, end, name); |
| BOOST_XPR_ENSURE_(begin != end && BOOST_XPR_CHAR_(char_type, '>') == *begin++, error_paren, "incomplete extension"); |
| return token_named_mark; |
| case BOOST_XPR_CHAR_(char_type, '='): |
| this->get_name_(++begin, end, name); |
| BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); |
| return token_named_mark_ref; |
| default: |
| BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension")); |
| } |
| |
| case BOOST_XPR_CHAR_(char_type, 'i'): |
| case BOOST_XPR_CHAR_(char_type, 'm'): |
| case BOOST_XPR_CHAR_(char_type, 's'): |
| case BOOST_XPR_CHAR_(char_type, 'x'): |
| case BOOST_XPR_CHAR_(char_type, '-'): |
| return this->parse_mods_(begin, end); |
| |
| default: |
| BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension")); |
| } |
| } |
| |
| return token_literal; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| // get_charset_token |
| // NOTE: white-space is *never* ignored in a charset. |
| template<typename FwdIter> |
| regex_constants::compiler_token_type get_charset_token(FwdIter &begin, FwdIter end) |
| { |
| using namespace regex_constants; |
| BOOST_ASSERT(begin != end); |
| switch(*begin) |
| { |
| case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert; |
| case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen; |
| case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end; |
| case BOOST_XPR_CHAR_(char_type, '['): |
| { |
| FwdIter next = begin; ++next; |
| if(next != end) |
| { |
| BOOST_XPR_ENSURE_( |
| *next != BOOST_XPR_CHAR_(char_type, '=') |
| , error_collate |
| , "equivalence classes are not yet supported" |
| ); |
| |
| BOOST_XPR_ENSURE_( |
| *next != BOOST_XPR_CHAR_(char_type, '.') |
| , error_collate |
| , "collation sequences are not yet supported" |
| ); |
| |
| if(*next == BOOST_XPR_CHAR_(char_type, ':')) |
| { |
| begin = ++next; |
| return token_posix_charset_begin; |
| } |
| } |
| } |
| break; |
| case BOOST_XPR_CHAR_(char_type, ':'): |
| { |
| FwdIter next = begin; ++next; |
| if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']')) |
| { |
| begin = ++next; |
| return token_posix_charset_end; |
| } |
| } |
| break; |
| case BOOST_XPR_CHAR_(char_type, '\\'): |
| if(++begin != end) |
| { |
| switch(*begin) |
| { |
| case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace; |
| default:; |
| } |
| } |
| return token_escape; |
| default:; |
| } |
| return token_literal; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| // get_escape_token |
| template<typename FwdIter> |
| regex_constants::compiler_token_type get_escape_token(FwdIter &begin, FwdIter end) |
| { |
| using namespace regex_constants; |
| if(begin != end) |
| { |
| switch(*begin) |
| { |
| //case BOOST_XPR_CHAR_(char_type, 'a'): ++begin; return token_escape_bell; |
| //case BOOST_XPR_CHAR_(char_type, 'c'): ++begin; return token_escape_control; |
| //case BOOST_XPR_CHAR_(char_type, 'e'): ++begin; return token_escape_escape; |
| //case BOOST_XPR_CHAR_(char_type, 'f'): ++begin; return token_escape_formfeed; |
| //case BOOST_XPR_CHAR_(char_type, 'n'): ++begin; return token_escape_newline; |
| //case BOOST_XPR_CHAR_(char_type, 't'): ++begin; return token_escape_horizontal_tab; |
| //case BOOST_XPR_CHAR_(char_type, 'v'): ++begin; return token_escape_vertical_tab; |
| case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence; |
| case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary; |
| case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary; |
| case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end; |
| case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin; |
| case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence; |
| // Non-standard extension to ECMAScript syntax |
| case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin; |
| case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end; |
| default:; // fall-through |
| } |
| } |
| |
| return token_escape; |
| } |
| |
| private: |
| |
| ////////////////////////////////////////////////////////////////////////// |
| // parse_mods_ |
| template<typename FwdIter> |
| regex_constants::compiler_token_type parse_mods_(FwdIter &begin, FwdIter end) |
| { |
| using namespace regex_constants; |
| bool set = true; |
| do switch(*begin) |
| { |
| case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break; |
| case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break; |
| case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break; |
| case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break; |
| case BOOST_XPR_CHAR_(char_type, ':'): ++begin; // fall-through |
| case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark; |
| case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; // else fall-through |
| default: BOOST_THROW_EXCEPTION(regex_error(error_paren, "unknown pattern modifier")); |
| } |
| while(BOOST_XPR_ENSURE_(++begin != end, error_paren, "incomplete extension")); |
| // this return is technically unreachable, but this must |
| // be here to work around a bug in gcc 4.0 |
| return token_no_mark; |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // flag_ |
| void flag_(bool set, regex_constants::syntax_option_type flag) |
| { |
| this->flags_ = set ? (this->flags_ | flag) : (this->flags_ & ~flag); |
| } |
| |
| /////////////////////////////////////////////////////////////////////////// |
| // is_space_ |
| bool is_space_(char_type ch) const |
| { |
| return 0 != this->space_ && this->traits().isctype(ch, this->space_); |
| } |
| |
| /////////////////////////////////////////////////////////////////////////// |
| // is_alnum_ |
| bool is_alnum_(char_type ch) const |
| { |
| return 0 != this->alnum_ && this->traits().isctype(ch, this->alnum_); |
| } |
| |
| /////////////////////////////////////////////////////////////////////////// |
| // get_name_ |
| template<typename FwdIter> |
| void get_name_(FwdIter &begin, FwdIter end, string_type &name) |
| { |
| this->eat_ws_(begin, end); |
| for(name.clear(); begin != end && this->is_alnum_(*begin); ++begin) |
| { |
| name.push_back(*begin); |
| } |
| this->eat_ws_(begin, end); |
| BOOST_XPR_ENSURE_(!name.empty(), regex_constants::error_paren, "incomplete extension"); |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // eat_ws_ |
| template<typename FwdIter> |
| FwdIter &eat_ws_(FwdIter &begin, FwdIter end) |
| { |
| if(0 != (regex_constants::ignore_white_space & this->flags())) |
| { |
| while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == *begin || this->is_space_(*begin))) |
| { |
| if(BOOST_XPR_CHAR_(char_type, '#') == *begin++) |
| { |
| while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {} |
| } |
| else |
| { |
| for(; end != begin && this->is_space_(*begin); ++begin) {} |
| } |
| } |
| } |
| |
| return begin; |
| } |
| |
| regex_traits traits_; |
| regex_constants::syntax_option_type flags_; |
| typename regex_traits::char_class_type space_; |
| typename regex_traits::char_class_type alnum_; |
| }; |
| |
| }} // namespace boost::xpressive |
| |
| #endif |