| /////////////////////////////////////////////////////////////////////////////// |
| /// \file regex_token_iterator.hpp |
| /// Contains the definition of regex_token_iterator, and STL-compatible iterator |
| /// for tokenizing a string using a regular expression. |
| // |
| // Copyright 2008 Eric Niebler. Distributed under the Boost |
| // Software License, Version 1.0. (See accompanying file |
| // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| |
| #ifndef BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005 |
| #define BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005 |
| |
| // MS compatible compilers support #pragma once |
| #if defined(_MSC_VER) && (_MSC_VER >= 1020) |
| # pragma once |
| #endif |
| |
| #include <vector> |
| #include <boost/assert.hpp> |
| #include <boost/mpl/assert.hpp> |
| #include <boost/type_traits/is_same.hpp> |
| #include <boost/type_traits/is_convertible.hpp> |
| #include <boost/xpressive/regex_iterator.hpp> |
| |
| namespace boost { namespace xpressive { namespace detail |
| { |
| |
| ////////////////////////////////////////////////////////////////////////// |
| // regex_token_iterator_impl |
| // |
| template<typename BidiIter> |
| struct regex_token_iterator_impl |
| : counted_base<regex_token_iterator_impl<BidiIter> > |
| { |
| typedef sub_match<BidiIter> value_type; |
| |
| regex_token_iterator_impl |
| ( |
| BidiIter begin |
| , BidiIter cur |
| , BidiIter end |
| , BidiIter next_search |
| , basic_regex<BidiIter> const &rex |
| , regex_constants::match_flag_type flags = regex_constants::match_default |
| , std::vector<int> subs = std::vector<int>(1, 0) |
| , int n = -2 |
| , bool not_null = false |
| ) |
| : iter_(begin, cur, end, next_search, rex, flags, not_null) |
| , result_() |
| , n_((-2 == n) ? (int)subs.size() - 1 : n) |
| , subs_() |
| { |
| BOOST_ASSERT(0 != subs.size()); |
| this->subs_.swap(subs); |
| } |
| |
| bool next() |
| { |
| if(-1 != this->n_) |
| { |
| BidiIter cur = this->iter_.state_.cur_; |
| if(0 != (++this->n_ %= (int)this->subs_.size()) || this->iter_.next()) |
| { |
| this->result_ = (-1 == this->subs_[ this->n_ ]) |
| ? this->iter_.what_.prefix() |
| : this->iter_.what_[ this->subs_[ this->n_ ] ]; |
| return true; |
| } |
| else if(-1 == this->subs_[ this->n_-- ] && cur != this->iter_.state_.end_) |
| { |
| this->result_ = value_type(cur, this->iter_.state_.end_, true); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| bool equal_to(regex_token_iterator_impl<BidiIter> const &that) const |
| { |
| return this->iter_.equal_to(that.iter_) && this->n_ == that.n_; |
| } |
| |
| regex_iterator_impl<BidiIter> iter_; |
| value_type result_; |
| int n_; |
| std::vector<int> subs_; |
| }; |
| |
| inline int get_mark_number(int i) |
| { |
| return i; |
| } |
| |
| inline std::vector<int> to_vector(int subs) |
| { |
| return std::vector<int>(1, subs); |
| } |
| |
| inline std::vector<int> const &to_vector(std::vector<int> const &subs) |
| { |
| return subs; |
| } |
| |
| template<typename Int, std::size_t Size> |
| inline std::vector<int> to_vector(Int const (&sub_matches)[ Size ]) |
| { |
| // so that people can specify sub-match indices inline with |
| // string literals, like "\1\2\3", leave off the trailing '\0' |
| std::size_t const size = Size - is_same<Int, char>::value; |
| std::vector<int> vect(size); |
| for(std::size_t i = 0; i < size; ++i) |
| { |
| vect[i] = get_mark_number(sub_matches[i]); |
| } |
| return vect; |
| } |
| |
| template<typename Int> |
| inline std::vector<int> to_vector(std::vector<Int> const &sub_matches) |
| { |
| BOOST_MPL_ASSERT((is_convertible<Int, int>)); |
| return std::vector<int>(sub_matches.begin(), sub_matches.end()); |
| } |
| |
| } // namespace detail |
| |
| ////////////////////////////////////////////////////////////////////////// |
| // regex_token_iterator |
| // |
| template<typename BidiIter> |
| struct regex_token_iterator |
| { |
| typedef basic_regex<BidiIter> regex_type; |
| typedef typename iterator_value<BidiIter>::type char_type; |
| typedef sub_match<BidiIter> value_type; |
| typedef std::ptrdiff_t difference_type; |
| typedef value_type const *pointer; |
| typedef value_type const &reference; |
| typedef std::forward_iterator_tag iterator_category; |
| |
| /// INTERNAL ONLY |
| typedef detail::regex_token_iterator_impl<BidiIter> impl_type_; |
| |
| /// \post \c *this is the end of sequence iterator. |
| regex_token_iterator() |
| : impl_() |
| { |
| } |
| |
| /// \param begin The beginning of the character range to search. |
| /// \param end The end of the character range to search. |
| /// \param rex The regex pattern to search for. |
| /// \pre \c [begin,end) is a valid range. |
| regex_token_iterator |
| ( |
| BidiIter begin |
| , BidiIter end |
| , basic_regex<BidiIter> const &rex |
| ) |
| : impl_() |
| { |
| if(0 != rex.regex_id()) |
| { |
| this->impl_ = new impl_type_(begin, begin, end, begin, rex); |
| this->next_(); |
| } |
| } |
| |
| /// \param begin The beginning of the character range to search. |
| /// \param end The end of the character range to search. |
| /// \param rex The regex pattern to search for. |
| /// \param args A let() expression with argument bindings for semantic actions. |
| /// \pre \c [begin,end) is a valid range. |
| template<typename LetExpr> |
| regex_token_iterator |
| ( |
| BidiIter begin |
| , BidiIter end |
| , basic_regex<BidiIter> const &rex |
| , detail::let_<LetExpr> const &args |
| ) |
| : impl_() |
| { |
| if(0 != rex.regex_id()) |
| { |
| this->impl_ = new impl_type_(begin, begin, end, begin, rex); |
| detail::bind_args(args, this->impl_->iter_.what_); |
| this->next_(); |
| } |
| } |
| |
| /// \param begin The beginning of the character range to search. |
| /// \param end The end of the character range to search. |
| /// \param rex The regex pattern to search for. |
| /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.) |
| /// \pre \c [begin,end) is a valid range. |
| /// \pre \c subs is either an integer greater or equal to -1, |
| /// or else an array or non-empty \c std::vector\<\> of such integers. |
| template<typename Subs> |
| regex_token_iterator |
| ( |
| BidiIter begin |
| , BidiIter end |
| , basic_regex<BidiIter> const &rex |
| , Subs const &subs |
| , regex_constants::match_flag_type flags = regex_constants::match_default |
| ) |
| : impl_() |
| { |
| if(0 != rex.regex_id()) |
| { |
| this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs)); |
| this->next_(); |
| } |
| } |
| |
| /// \param begin The beginning of the character range to search. |
| /// \param end The end of the character range to search. |
| /// \param rex The regex pattern to search for. |
| /// \param args A let() expression with argument bindings for semantic actions. |
| /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.) |
| /// \pre \c [begin,end) is a valid range. |
| /// \pre \c subs is either an integer greater or equal to -1, |
| /// or else an array or non-empty \c std::vector\<\> of such integers. |
| template<typename Subs, typename LetExpr> |
| regex_token_iterator |
| ( |
| BidiIter begin |
| , BidiIter end |
| , basic_regex<BidiIter> const &rex |
| , Subs const &subs |
| , detail::let_<LetExpr> const &args |
| , regex_constants::match_flag_type flags = regex_constants::match_default |
| ) |
| : impl_() |
| { |
| if(0 != rex.regex_id()) |
| { |
| this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs)); |
| detail::bind_args(args, this->impl_->iter_.what_); |
| this->next_(); |
| } |
| } |
| |
| /// \post <tt>*this == that</tt> |
| regex_token_iterator(regex_token_iterator<BidiIter> const &that) |
| : impl_(that.impl_) // COW |
| { |
| } |
| |
| /// \post <tt>*this == that</tt> |
| regex_token_iterator<BidiIter> &operator =(regex_token_iterator<BidiIter> const &that) |
| { |
| this->impl_ = that.impl_; // COW |
| return *this; |
| } |
| |
| friend bool operator ==(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right) |
| { |
| if(!left.impl_ || !right.impl_) |
| { |
| return !left.impl_ && !right.impl_; |
| } |
| |
| return left.impl_->equal_to(*right.impl_); |
| } |
| |
| friend bool operator !=(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right) |
| { |
| return !(left == right); |
| } |
| |
| value_type const &operator *() const |
| { |
| return this->impl_->result_; |
| } |
| |
| value_type const *operator ->() const |
| { |
| return &this->impl_->result_; |
| } |
| |
| /// If N == -1 then sets *this equal to the end of sequence iterator. |
| /// Otherwise if N+1 \< subs.size(), then increments N and sets result equal to |
| /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())). |
| /// Otherwise if what.prefix().first != what[0].second and if the element match_prev_avail is |
| /// not set in flags then sets it. Then locates the next match as if by calling |
| /// regex_search(what[0].second, end, what, *pre, flags), with the following variation: |
| /// in the event that the previous match found was of zero length (what[0].length() == 0) |
| /// then attempts to find a non-zero length match starting at what[0].second, only if that |
| /// fails and provided what[0].second != suffix().second does it look for a (possibly zero |
| /// length) match starting from what[0].second + 1. If such a match is found then sets N |
| /// equal to zero, and sets result equal to |
| /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())). |
| /// Otherwise if no further matches were found, then let last_end be the endpoint of the last |
| /// match that was found. Then if last_end != end and subs[0] == -1 sets N equal to -1 and |
| /// sets result equal to value_type(last_end, end). Otherwise sets *this equal to the end |
| /// of sequence iterator. |
| regex_token_iterator<BidiIter> &operator ++() |
| { |
| this->fork_(); // un-share the implementation |
| this->next_(); |
| return *this; |
| } |
| |
| regex_token_iterator<BidiIter> operator ++(int) |
| { |
| regex_token_iterator<BidiIter> tmp(*this); |
| ++*this; |
| return tmp; |
| } |
| |
| private: |
| |
| /// INTERNAL ONLY |
| void fork_() |
| { |
| if(1 != this->impl_->use_count()) |
| { |
| intrusive_ptr<impl_type_> clone = new impl_type_ |
| ( |
| this->impl_->iter_.state_.begin_ |
| , this->impl_->iter_.state_.cur_ |
| , this->impl_->iter_.state_.end_ |
| , this->impl_->iter_.state_.next_search_ |
| , this->impl_->iter_.rex_ |
| , this->impl_->iter_.flags_ |
| , this->impl_->subs_ |
| , this->impl_->n_ |
| , this->impl_->iter_.not_null_ |
| ); |
| |
| // only copy the match_results struct if we have to. Note: if the next call |
| // to impl_->next() will return false or call regex_search, we don't need to |
| // copy the match_results struct. |
| if(-1 != this->impl_->n_ && this->impl_->n_ + 1 != static_cast<int>(this->impl_->subs_.size())) |
| { |
| // BUGBUG This is expensive -- it causes the sequence_stack to be cleared. |
| // Find a better way |
| clone->iter_.what_ = this->impl_->iter_.what_; |
| } |
| else |
| { |
| // At the very least, copy the action args |
| detail::core_access<BidiIter>::get_action_args(clone->iter_.what_) |
| = detail::core_access<BidiIter>::get_action_args(this->impl_->iter_.what_); |
| } |
| |
| this->impl_.swap(clone); |
| } |
| } |
| |
| /// INTERNAL ONLY |
| void next_() |
| { |
| BOOST_ASSERT(this->impl_ && 1 == this->impl_->use_count()); |
| if(!this->impl_->next()) |
| { |
| this->impl_ = 0; |
| } |
| } |
| |
| intrusive_ptr<impl_type_> impl_; |
| }; |
| |
| }} // namespace boost::xpressive |
| |
| #endif |