| // state_machine.hpp |
| // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) |
| // |
| // Distributed under the Boost Software License, Version 1.0. (See accompanying |
| // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| #ifndef BOOST_LEXER_STATE_MACHINE_HPP |
| #define BOOST_LEXER_STATE_MACHINE_HPP |
| |
| #include <algorithm> |
| #include "conversion/char_state_machine.hpp" |
| #include "consts.hpp" |
| #include <deque> |
| #include "internals.hpp" |
| #include <map> |
| #include "containers/ptr_vector.hpp" |
| #include "size_t.hpp" |
| #include <string> |
| |
| namespace boost |
| { |
| namespace lexer |
| { |
| template<typename CharT> |
| class basic_state_machine |
| { |
| public: |
| typedef CharT char_type; |
| |
| class iterator |
| { |
| public: |
| #if defined _MSC_VER && _MSC_VER <= 1200 |
| friend basic_state_machine; |
| #else |
| friend class basic_state_machine; |
| #endif |
| |
| struct data |
| { |
| // Current iterator info |
| std::size_t dfa; |
| std::size_t states; |
| std::size_t state; |
| std::size_t transitions; |
| std::size_t transition; |
| |
| // Current state info |
| bool end_state; |
| std::size_t id; |
| std::size_t unique_id; |
| std::size_t goto_dfa; |
| std::size_t bol_index; |
| std::size_t eol_index; |
| |
| // Current transition info |
| basic_string_token<CharT> token; |
| std::size_t goto_state; |
| |
| data () : |
| dfa (npos), |
| states (0), |
| state (npos), |
| transitions (0), |
| transition (npos), |
| end_state (false), |
| id (npos), |
| unique_id (npos), |
| goto_dfa (npos), |
| bol_index (npos), |
| eol_index (npos), |
| goto_state (npos) |
| { |
| } |
| |
| bool operator == (const data &rhs_) const |
| { |
| return dfa == rhs_.dfa && |
| states == rhs_.states && |
| state == rhs_.state && |
| transitions == rhs_.transitions && |
| transition == rhs_.transition && |
| end_state == rhs_.end_state && |
| id == rhs_.id && |
| unique_id == rhs_.unique_id && |
| goto_dfa == rhs_.goto_dfa && |
| bol_index == rhs_.bol_index && |
| eol_index == rhs_.eol_index && |
| token == rhs_.token && |
| transition == rhs_.transition; |
| } |
| }; |
| |
| iterator () : |
| _sm (0), |
| _dfas (0), |
| _dfa (npos), |
| _states (0), |
| _state (npos), |
| _transitions (0), |
| _transition (npos) |
| { |
| } |
| |
| bool operator == (const iterator &rhs_) const |
| { |
| return _dfas == rhs_._dfas && _dfa == rhs_._dfa && |
| _states == rhs_._states && _state == rhs_._state && |
| _transitions == rhs_._transitions && |
| _transition == rhs_._transition; |
| } |
| |
| bool operator != (const iterator &rhs_) const |
| { |
| return !(*this == rhs_); |
| } |
| |
| data &operator * () |
| { |
| return _data; |
| } |
| |
| data *operator -> () |
| { |
| return &_data; |
| } |
| |
| // Let compiler generate operator = (). |
| |
| // prefix version |
| iterator &operator ++ () |
| { |
| next (); |
| return *this; |
| } |
| |
| // postfix version |
| iterator operator ++ (int) |
| { |
| iterator iter_ = *this; |
| |
| next (); |
| return iter_; |
| } |
| |
| void clear () |
| { |
| _dfas = _states = _transitions = 0; |
| _dfa = _state = _transition = npos; |
| } |
| |
| private: |
| basic_state_machine *_sm; |
| data _data; |
| std::size_t _dfas; |
| std::size_t _dfa; |
| std::size_t _states; |
| std::size_t _state; |
| std::size_t _transitions; |
| std::size_t _transition; |
| typename detail::basic_char_state_machine<CharT>::state:: |
| size_t_string_token_map::const_iterator _token_iter; |
| typename detail::basic_char_state_machine<CharT>::state:: |
| size_t_string_token_map::const_iterator _token_end; |
| |
| void next () |
| { |
| bool reset_state_ = false; |
| |
| if (_transition >= _transitions) |
| { |
| _transition = _data.transition = 0; |
| _data.state = ++_state; |
| reset_state_ = true; |
| |
| if (_state >= _states) |
| { |
| ++_dfa; |
| |
| if (_dfa >= _dfas) |
| { |
| clear (); |
| reset_state_ = false; |
| } |
| else |
| { |
| _states = _data.states = |
| _sm->_csm._sm_vector[_dfa].size (); |
| _state = _data.state = 0; |
| } |
| } |
| } |
| else |
| { |
| _data.transition = _transition; |
| } |
| |
| if (reset_state_) |
| { |
| const typename detail::basic_char_state_machine<CharT>:: |
| state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state]; |
| |
| _transitions = _data.transitions = ptr_->_transitions.size (); |
| _data.end_state = ptr_->_end_state; |
| _data.id = ptr_->_id; |
| _data.unique_id = ptr_->_unique_id; |
| _data.goto_dfa = ptr_->_state; |
| _data.bol_index = ptr_->_bol_index; |
| _data.eol_index = ptr_->_eol_index; |
| _token_iter = ptr_->_transitions.begin (); |
| _token_end = ptr_->_transitions.end (); |
| } |
| |
| if (_token_iter != _token_end) |
| { |
| _data.token = _token_iter->second; |
| _data.goto_state = _token_iter->first; |
| ++_token_iter; |
| ++_transition; |
| } |
| else |
| { |
| _data.token.clear (); |
| _data.goto_state = npos; |
| } |
| } |
| }; |
| |
| #if defined _MSC_VER && _MSC_VER <= 1200 |
| friend iterator; |
| #else |
| friend class iterator; |
| #endif |
| |
| basic_state_machine () |
| { |
| } |
| |
| void clear () |
| { |
| _internals.clear (); |
| _csm.clear (); |
| } |
| |
| bool empty () const |
| { |
| // Don't include _csm in this test, as irrelevant to state. |
| return _internals._lookup->empty () && |
| _internals._dfa_alphabet.empty () && |
| _internals._dfa->empty (); |
| } |
| |
| std::size_t size () const |
| { |
| return _internals._dfa->size (); |
| } |
| |
| bool operator == (const basic_state_machine &rhs_) const |
| { |
| // Don't include _csm in this test, as irrelevant to state. |
| return _internals._lookup == rhs_._internals._lookup && |
| _internals._dfa_alphabet == rhs_._internals._dfa_alphabet && |
| _internals._dfa == rhs_._internals._dfa && |
| _internals._seen_BOL_assertion == |
| rhs_._internals._seen_BOL_assertion && |
| _internals._seen_EOL_assertion == |
| rhs_._internals._seen_EOL_assertion; |
| } |
| |
| iterator begin () const |
| { |
| iterator iter_; |
| |
| iter_._sm = const_cast<basic_state_machine *>(this); |
| check_for_csm (); |
| |
| if (!_csm.empty ()) |
| { |
| const typename detail::basic_char_state_machine<CharT>:: |
| state_vector *ptr_ = &_csm._sm_vector.front (); |
| |
| iter_._dfas = _csm._sm_vector.size (); |
| iter_._states = iter_._data.states = ptr_->size (); |
| iter_._transitions = iter_._data.transitions = |
| ptr_->front ()._transitions.size (); |
| iter_._dfa = iter_._data.dfa = 0; |
| iter_._state = iter_._data.state = 0; |
| iter_._transition = 0; |
| iter_._data.end_state = ptr_->front ()._end_state; |
| iter_._data.id = ptr_->front ()._id; |
| iter_._data.unique_id = ptr_->front ()._unique_id; |
| iter_._data.goto_dfa = ptr_->front ()._state; |
| iter_._data.bol_index = ptr_->front ()._bol_index; |
| iter_._data.eol_index = ptr_->front ()._eol_index; |
| iter_._token_iter = ptr_->front ()._transitions.begin (); |
| iter_._token_end = ptr_->front ()._transitions.end (); |
| |
| // Deal with case where there is only a bol or eol |
| // but no other transitions. |
| if (iter_._transitions) |
| { |
| ++iter_; |
| } |
| } |
| |
| return iter_; |
| } |
| |
| iterator end () const |
| { |
| iterator iter_; |
| |
| iter_._sm = const_cast<basic_state_machine *>(this); |
| return iter_; |
| } |
| |
| void swap (basic_state_machine &sm_) |
| { |
| _internals.swap (sm_._internals); |
| _csm.swap (sm_._csm); |
| } |
| |
| const detail::internals &data () const |
| { |
| return _internals; |
| } |
| |
| private: |
| detail::internals _internals; |
| mutable detail::basic_char_state_machine<CharT> _csm; |
| |
| void check_for_csm () const |
| { |
| if (_csm.empty ()) |
| { |
| human_readable (_csm); |
| } |
| } |
| |
| void human_readable (detail::basic_char_state_machine<CharT> &sm_) const |
| { |
| const std::size_t max_ = sizeof (CharT) == 1 ? |
| num_chars : num_wchar_ts; |
| const std::size_t start_states_ = _internals._dfa->size (); |
| |
| sm_.clear (); |
| sm_._sm_vector.resize (start_states_); |
| |
| for (std::size_t start_state_index_ = 0; |
| start_state_index_ < start_states_; ++start_state_index_) |
| { |
| const detail::internals::size_t_vector *lu_ = |
| _internals._lookup[start_state_index_]; |
| const std::size_t alphabet_ = |
| _internals._dfa_alphabet[start_state_index_] - dfa_offset; |
| std::vector<std::basic_string<CharT> > chars_ (alphabet_); |
| const std::size_t states_ = _internals._dfa[start_state_index_]-> |
| size () / (alphabet_ + dfa_offset); |
| const std::size_t *read_ptr_ = &_internals. |
| _dfa[start_state_index_]->front () + alphabet_ + dfa_offset; |
| |
| sm_._sm_vector[start_state_index_].resize (states_ - 1); |
| |
| for (std::size_t alpha_index_ = 0; alpha_index_ < max_; |
| ++alpha_index_) |
| { |
| const std::size_t col_ = lu_->at (alpha_index_); |
| |
| if (col_ != dead_state_index) |
| { |
| chars_[col_ - dfa_offset] += static_cast<CharT> |
| (alpha_index_); |
| } |
| } |
| |
| for (std::size_t state_index_ = 1; state_index_ < states_; |
| ++state_index_) |
| { |
| typename detail::basic_char_state_machine<CharT>::state |
| *state_ = &sm_._sm_vector[start_state_index_] |
| [state_index_ - 1]; |
| |
| state_->_end_state = *read_ptr_ != 0; |
| state_->_id = *(read_ptr_ + id_index); |
| state_->_unique_id = *(read_ptr_ + unique_id_index); |
| state_->_state = *(read_ptr_ + state_index); |
| state_->_bol_index = *(read_ptr_ + bol_index) - 1; |
| state_->_eol_index = *(read_ptr_ + eol_index) - 1; |
| read_ptr_ += dfa_offset; |
| |
| for (std::size_t col_index_ = 0; col_index_ < alphabet_; |
| ++col_index_, ++read_ptr_) |
| { |
| const std::size_t transition_ = *read_ptr_; |
| |
| if (transition_ != 0) |
| { |
| const std::size_t i_ = transition_ - 1; |
| typename detail::basic_char_state_machine<CharT>:: |
| state::size_t_string_token_map::iterator iter_ = |
| state_->_transitions.find (i_); |
| |
| if (iter_ == state_->_transitions.end ()) |
| { |
| basic_string_token<CharT> token_ |
| (false, chars_[col_index_]); |
| typename detail::basic_char_state_machine<CharT>:: |
| state::size_t_string_token_pair pair_ |
| (i_, token_); |
| |
| state_->_transitions.insert (pair_); |
| } |
| else |
| { |
| iter_->second._charset += chars_[col_index_]; |
| } |
| } |
| } |
| |
| for (typename detail::basic_char_state_machine<CharT>::state:: |
| size_t_string_token_map::iterator iter_ = |
| state_->_transitions.begin (), |
| end_ = state_->_transitions.end (); |
| iter_ != end_; ++iter_) |
| { |
| std::sort (iter_->second._charset.begin (), |
| iter_->second._charset.end ()); |
| iter_->second.normalise (); |
| } |
| } |
| } |
| } |
| }; |
| |
| typedef basic_state_machine<char> state_machine; |
| typedef basic_state_machine<wchar_t> wstate_machine; |
| } |
| } |
| |
| #endif |