| // generator.hpp |
| // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) |
| // |
| // Distributed under the Boost Software License, Version 1.0. (See accompanying |
| // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| #ifndef BOOST_LEXER_GENERATOR_HPP |
| #define BOOST_LEXER_GENERATOR_HPP |
| |
| #include "char_traits.hpp" |
| // memcmp() |
| #include <cstring> |
| #include "partition/charset.hpp" |
| #include "partition/equivset.hpp" |
| #include <memory> |
| #include "parser/tree/node.hpp" |
| #include "parser/parser.hpp" |
| #include "containers/ptr_list.hpp" |
| #include "rules.hpp" |
| #include "state_machine.hpp" |
| |
| namespace boost |
| { |
| namespace lexer |
| { |
| template<typename CharT, typename Traits = char_traits<CharT> > |
| class basic_generator |
| { |
| public: |
| typedef typename detail::internals::size_t_vector size_t_vector; |
| typedef basic_rules<CharT> rules; |
| |
| static void build (const rules &rules_, |
| basic_state_machine<CharT> &state_machine_) |
| { |
| std::size_t index_ = 0; |
| std::size_t size_ = rules_.statemap ().size (); |
| node_ptr_vector node_ptr_vector_; |
| detail::internals &internals_ = const_cast<detail::internals &> |
| (state_machine_.data ()); |
| bool seen_BOL_assertion_ = false; |
| bool seen_EOL_assertion_ = false; |
| |
| state_machine_.clear (); |
| |
| for (; index_ < size_; ++index_) |
| { |
| internals_._lookup->push_back (static_cast<size_t_vector *>(0)); |
| internals_._lookup->back () = new size_t_vector; |
| internals_._dfa_alphabet.push_back (0); |
| internals_._dfa->push_back (static_cast<size_t_vector *>(0)); |
| internals_._dfa->back () = new size_t_vector; |
| } |
| |
| for (index_ = 0, size_ = internals_._lookup->size (); |
| index_ < size_; ++index_) |
| { |
| internals_._lookup[index_]->resize (sizeof (CharT) == 1 ? |
| num_chars : num_wchar_ts, dead_state_index); |
| |
| if (!rules_.regexes ()[index_].empty ()) |
| { |
| // vector mapping token indexes to partitioned token index sets |
| index_set_vector set_mapping_; |
| // syntax tree |
| detail::node *root_ = build_tree (rules_, index_, |
| node_ptr_vector_, internals_, set_mapping_); |
| |
| build_dfa (root_, set_mapping_, |
| internals_._dfa_alphabet[index_], |
| *internals_._dfa[index_]); |
| |
| if (internals_._seen_BOL_assertion) |
| { |
| seen_BOL_assertion_ = true; |
| } |
| |
| if (internals_._seen_EOL_assertion) |
| { |
| seen_EOL_assertion_ = true; |
| } |
| |
| internals_._seen_BOL_assertion = false; |
| internals_._seen_EOL_assertion = false; |
| } |
| } |
| |
| internals_._seen_BOL_assertion = seen_BOL_assertion_; |
| internals_._seen_EOL_assertion = seen_EOL_assertion_; |
| } |
| |
| static void minimise (basic_state_machine<CharT> &state_machine_) |
| { |
| detail::internals &internals_ = const_cast<detail::internals &> |
| (state_machine_.data ()); |
| const std::size_t machines_ = internals_._dfa->size (); |
| |
| for (std::size_t i_ = 0; i_ < machines_; ++i_) |
| { |
| const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[i_]; |
| size_t_vector *dfa_ = internals_._dfa[i_]; |
| |
| if (dfa_alphabet_ != 0) |
| { |
| std::size_t size_ = 0; |
| |
| do |
| { |
| size_ = dfa_->size (); |
| minimise_dfa (dfa_alphabet_, *dfa_, size_); |
| } while (dfa_->size () != size_); |
| } |
| } |
| } |
| |
| protected: |
| typedef detail::basic_charset<CharT> charset; |
| typedef detail::ptr_list<charset> charset_list; |
| typedef std::auto_ptr<charset> charset_ptr; |
| typedef detail::equivset equivset; |
| typedef detail::ptr_list<equivset> equivset_list; |
| typedef std::auto_ptr<equivset> equivset_ptr; |
| typedef typename charset::index_set index_set; |
| typedef std::vector<index_set> index_set_vector; |
| typedef detail::basic_parser<CharT> parser; |
| typedef typename parser::node_ptr_vector node_ptr_vector; |
| typedef std::set<const detail::node *> node_set; |
| typedef detail::ptr_vector<node_set> node_set_vector; |
| typedef std::vector<const detail::node *> node_vector; |
| typedef detail::ptr_vector<node_vector> node_vector_vector; |
| typedef typename parser::string string; |
| typedef std::pair<string, string> string_pair; |
| typedef typename parser::tokeniser::string_token string_token; |
| typedef std::deque<string_pair> macro_deque; |
| typedef std::pair<string, const detail::node *> macro_pair; |
| typedef typename parser::macro_map::iterator macro_iter; |
| typedef std::pair<macro_iter, bool> macro_iter_pair; |
| typedef typename parser::tokeniser::token_map token_map; |
| |
| static detail::node *build_tree (const rules &rules_, |
| const std::size_t state_, node_ptr_vector &node_ptr_vector_, |
| detail::internals &internals_, index_set_vector &set_mapping_) |
| { |
| size_t_vector *lookup_ = internals_._lookup[state_]; |
| const typename rules::string_deque_deque ®exes_ = |
| rules_.regexes (); |
| const typename rules::id_vector_deque &ids_ = rules_.ids (); |
| const typename rules::id_vector_deque &unique_ids_ = |
| rules_.unique_ids (); |
| const typename rules::id_vector_deque &states_ = rules_.states (); |
| typename rules::string_deque::const_iterator regex_iter_ = |
| regexes_[state_].begin (); |
| typename rules::string_deque::const_iterator regex_iter_end_ = |
| regexes_[state_].end (); |
| typename rules::id_vector::const_iterator ids_iter_ = |
| ids_[state_].begin (); |
| typename rules::id_vector::const_iterator unique_ids_iter_ = |
| unique_ids_[state_].begin (); |
| typename rules::id_vector::const_iterator states_iter_ = |
| states_[state_].begin (); |
| const typename rules::string ®ex_ = *regex_iter_; |
| // map of regex charset tokens (strings) to index |
| token_map token_map_; |
| const typename rules::string_pair_deque ¯odeque_ = |
| rules_.macrodeque (); |
| typename parser::macro_map macromap_; |
| typename detail::node::node_vector tree_vector_; |
| |
| build_macros (token_map_, macrodeque_, macromap_, |
| rules_.flags (), rules_.locale (), node_ptr_vector_, |
| internals_._seen_BOL_assertion, internals_._seen_EOL_assertion); |
| |
| detail::node *root_ = parser::parse (regex_.c_str (), |
| regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_, |
| *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_, |
| macromap_, token_map_, internals_._seen_BOL_assertion, |
| internals_._seen_EOL_assertion); |
| |
| ++regex_iter_; |
| ++ids_iter_; |
| ++unique_ids_iter_; |
| ++states_iter_; |
| tree_vector_.push_back (root_); |
| |
| // build syntax trees |
| while (regex_iter_ != regex_iter_end_) |
| { |
| // re-declare var, otherwise we perform an assignment..! |
| const typename rules::string ®ex_ = *regex_iter_; |
| |
| root_ = parser::parse (regex_.c_str (), |
| regex_.c_str () + regex_.size (), *ids_iter_, |
| *unique_ids_iter_, *states_iter_, rules_.flags (), |
| rules_.locale (), node_ptr_vector_, macromap_, token_map_, |
| internals_._seen_BOL_assertion, |
| internals_._seen_EOL_assertion); |
| tree_vector_.push_back (root_); |
| ++regex_iter_; |
| ++ids_iter_; |
| ++unique_ids_iter_; |
| ++states_iter_; |
| } |
| |
| if (internals_._seen_BOL_assertion) |
| { |
| // Fixup BOLs |
| typename detail::node::node_vector::iterator iter_ = |
| tree_vector_.begin (); |
| typename detail::node::node_vector::iterator end_ = |
| tree_vector_.end (); |
| |
| for (; iter_ != end_; ++iter_) |
| { |
| fixup_bol (*iter_, node_ptr_vector_); |
| } |
| } |
| |
| // join trees |
| { |
| typename detail::node::node_vector::iterator iter_ = |
| tree_vector_.begin (); |
| typename detail::node::node_vector::iterator end_ = |
| tree_vector_.end (); |
| |
| if (iter_ != end_) |
| { |
| root_ = *iter_; |
| ++iter_; |
| } |
| |
| for (; iter_ != end_; ++iter_) |
| { |
| node_ptr_vector_->push_back (static_cast<detail::selection_node *>(0)); |
| node_ptr_vector_->back () = new detail::selection_node |
| (root_, *iter_); |
| root_ = node_ptr_vector_->back (); |
| } |
| } |
| |
| // partitioned token list |
| charset_list token_list_; |
| |
| set_mapping_.resize (token_map_.size ()); |
| partition_tokens (token_map_, token_list_); |
| |
| typename charset_list::list::const_iterator iter_ = |
| token_list_->begin (); |
| typename charset_list::list::const_iterator end_ = |
| token_list_->end (); |
| std::size_t index_ = 0; |
| |
| for (; iter_ != end_; ++iter_, ++index_) |
| { |
| const charset *cs_ = *iter_; |
| typename charset::index_set::const_iterator set_iter_ = |
| cs_->_index_set.begin (); |
| typename charset::index_set::const_iterator set_end_ = |
| cs_->_index_set.end (); |
| |
| fill_lookup (cs_->_token, lookup_, index_); |
| |
| for (; set_iter_ != set_end_; ++set_iter_) |
| { |
| set_mapping_[*set_iter_].insert (index_); |
| } |
| } |
| |
| internals_._dfa_alphabet[state_] = token_list_->size () + dfa_offset; |
| return root_; |
| } |
| |
| static void build_macros (token_map &token_map_, |
| const macro_deque ¯odeque_, |
| typename parser::macro_map ¯omap_, const regex_flags flags_, |
| const std::locale &locale_, node_ptr_vector &node_ptr_vector_, |
| bool &seen_BOL_assertion_, bool &seen_EOL_assertion_) |
| { |
| for (typename macro_deque::const_iterator iter_ = |
| macrodeque_.begin (), end_ = macrodeque_.end (); |
| iter_ != end_; ++iter_) |
| { |
| const typename rules::string &name_ = iter_->first; |
| const typename rules::string ®ex_ = iter_->second; |
| detail::node *node_ = parser::parse (regex_.c_str (), |
| regex_.c_str () + regex_.size (), 0, 0, 0, flags_, |
| locale_, node_ptr_vector_, macromap_, token_map_, |
| seen_BOL_assertion_, seen_EOL_assertion_); |
| macro_iter_pair map_iter_ = macromap_. |
| insert (macro_pair (name_, static_cast<const detail::node *> |
| (0))); |
| |
| map_iter_.first->second = node_; |
| } |
| } |
| |
| static void build_dfa (detail::node *root_, |
| const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_, |
| size_t_vector &dfa_) |
| { |
| typename detail::node::node_vector *followpos_ = |
| &root_->firstpos (); |
| node_set_vector seen_sets_; |
| node_vector_vector seen_vectors_; |
| size_t_vector hash_vector_; |
| |
| // 'jam' state |
| dfa_.resize (dfa_alphabet_, 0); |
| closure (followpos_, seen_sets_, seen_vectors_, |
| hash_vector_, dfa_alphabet_, dfa_); |
| |
| std::size_t *ptr_ = 0; |
| |
| for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_) |
| { |
| equivset_list equiv_list_; |
| |
| build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_); |
| |
| for (typename equivset_list::list::const_iterator iter_ = |
| equiv_list_->begin (), end_ = equiv_list_->end (); |
| iter_ != end_; ++iter_) |
| { |
| equivset *equivset_ = *iter_; |
| const std::size_t transition_ = closure |
| (&equivset_->_followpos, seen_sets_, seen_vectors_, |
| hash_vector_, dfa_alphabet_, dfa_); |
| |
| if (transition_ != npos) |
| { |
| ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_); |
| |
| // Prune abstemious transitions from end states. |
| if (*ptr_ && !equivset_->_greedy) continue; |
| |
| for (typename detail::equivset::index_vector::const_iterator |
| equiv_iter_ = equivset_->_index_vector.begin (), |
| equiv_end_ = equivset_->_index_vector.end (); |
| equiv_iter_ != equiv_end_; ++equiv_iter_) |
| { |
| const std::size_t index_ = *equiv_iter_; |
| |
| if (index_ == bol_token) |
| { |
| if (ptr_[eol_index] == 0) |
| { |
| ptr_[bol_index] = transition_; |
| } |
| } |
| else if (index_ == eol_token) |
| { |
| if (ptr_[bol_index] == 0) |
| { |
| ptr_[eol_index] = transition_; |
| } |
| } |
| else |
| { |
| ptr_[index_ + dfa_offset] = transition_; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| static std::size_t closure (typename detail::node::node_vector *followpos_, |
| node_set_vector &seen_sets_, node_vector_vector &seen_vectors_, |
| size_t_vector &hash_vector_, const std::size_t size_, |
| size_t_vector &dfa_) |
| { |
| bool end_state_ = false; |
| std::size_t id_ = 0; |
| std::size_t unique_id_ = npos; |
| std::size_t state_ = 0; |
| std::size_t hash_ = 0; |
| |
| if (followpos_->empty ()) return npos; |
| |
| std::size_t index_ = 0; |
| std::auto_ptr<node_set> set_ptr_ (new node_set); |
| std::auto_ptr<node_vector> vector_ptr_ (new node_vector); |
| |
| for (typename detail::node::node_vector::const_iterator iter_ = |
| followpos_->begin (), end_ = followpos_->end (); |
| iter_ != end_; ++iter_) |
| { |
| closure_ex (*iter_, end_state_, id_, unique_id_, state_, |
| set_ptr_.get (), vector_ptr_.get (), hash_); |
| } |
| |
| bool found_ = false; |
| typename size_t_vector::const_iterator hash_iter_ = |
| hash_vector_.begin (); |
| typename size_t_vector::const_iterator hash_end_ = |
| hash_vector_.end (); |
| typename node_set_vector::vector::const_iterator set_iter_ = |
| seen_sets_->begin (); |
| |
| for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_) |
| { |
| found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_; |
| ++index_; |
| |
| if (found_) break; |
| } |
| |
| if (!found_) |
| { |
| seen_sets_->push_back (static_cast<node_set *>(0)); |
| seen_sets_->back () = set_ptr_.release (); |
| seen_vectors_->push_back (static_cast<node_vector *>(0)); |
| seen_vectors_->back () = vector_ptr_.release (); |
| hash_vector_.push_back (hash_); |
| // State 0 is the jam state... |
| index_ = seen_sets_->size (); |
| |
| const std::size_t old_size_ = dfa_.size (); |
| |
| dfa_.resize (old_size_ + size_, 0); |
| |
| if (end_state_) |
| { |
| dfa_[old_size_] |= end_state; |
| dfa_[old_size_ + id_index] = id_; |
| dfa_[old_size_ + unique_id_index] = unique_id_; |
| dfa_[old_size_ + state_index] = state_; |
| } |
| } |
| |
| return index_; |
| } |
| |
| static void closure_ex (detail::node *node_, bool &end_state_, |
| std::size_t &id_, std::size_t &unique_id_, std::size_t &state_, |
| node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_) |
| { |
| const bool temp_end_state_ = node_->end_state (); |
| |
| if (temp_end_state_) |
| { |
| if (!end_state_) |
| { |
| end_state_ = true; |
| id_ = node_->id (); |
| unique_id_ = node_->unique_id (); |
| state_ = node_->lexer_state (); |
| } |
| } |
| |
| if (set_ptr_->insert (node_).second) |
| { |
| vector_ptr_->push_back (node_); |
| hash_ += reinterpret_cast<std::size_t> (node_); |
| } |
| } |
| |
| static void partition_tokens (const token_map &map_, |
| charset_list &lhs_) |
| { |
| charset_list rhs_; |
| |
| fill_rhs_list (map_, rhs_); |
| |
| if (!rhs_->empty ()) |
| { |
| typename charset_list::list::iterator iter_; |
| typename charset_list::list::iterator end_; |
| charset_ptr overlap_ (new charset); |
| |
| lhs_->push_back (static_cast<charset *>(0)); |
| lhs_->back () = rhs_->front (); |
| rhs_->pop_front (); |
| |
| while (!rhs_->empty ()) |
| { |
| charset_ptr r_ (rhs_->front ()); |
| |
| rhs_->pop_front (); |
| iter_ = lhs_->begin (); |
| end_ = lhs_->end (); |
| |
| while (!r_->empty () && iter_ != end_) |
| { |
| typename charset_list::list::iterator l_iter_ = iter_; |
| |
| (*l_iter_)->intersect (*r_.get (), *overlap_.get ()); |
| |
| if (overlap_->empty ()) |
| { |
| ++iter_; |
| } |
| else if ((*l_iter_)->empty ()) |
| { |
| delete *l_iter_; |
| *l_iter_ = overlap_.release (); |
| |
| // VC++ 6 Hack: |
| charset_ptr temp_overlap_ (new charset); |
| |
| overlap_ = temp_overlap_; |
| ++iter_; |
| } |
| else if (r_->empty ()) |
| { |
| delete r_.release (); |
| r_ = overlap_; |
| |
| // VC++ 6 Hack: |
| charset_ptr temp_overlap_ (new charset); |
| |
| overlap_ = temp_overlap_; |
| break; |
| } |
| else |
| { |
| iter_ = lhs_->insert (++iter_, |
| static_cast<charset *>(0)); |
| *iter_ = overlap_.release (); |
| |
| // VC++ 6 Hack: |
| charset_ptr temp_overlap_ (new charset); |
| |
| overlap_ = temp_overlap_; |
| ++iter_; |
| end_ = lhs_->end (); |
| } |
| } |
| |
| if (!r_->empty ()) |
| { |
| lhs_->push_back (static_cast<charset *>(0)); |
| lhs_->back () = r_.release (); |
| } |
| } |
| } |
| } |
| |
| static void fill_rhs_list (const token_map &map_, |
| charset_list &list_) |
| { |
| typename parser::tokeniser::token_map::const_iterator iter_ = |
| map_.begin (); |
| typename parser::tokeniser::token_map::const_iterator end_ = |
| map_.end (); |
| |
| for (; iter_ != end_; ++iter_) |
| { |
| list_->push_back (static_cast<charset *>(0)); |
| list_->back () = new charset (iter_->first, iter_->second); |
| } |
| } |
| |
| static void fill_lookup (const string_token &token_, |
| size_t_vector *lookup_, const std::size_t index_) |
| { |
| const CharT *curr_ = token_._charset.c_str (); |
| const CharT *chars_end_ = curr_ + token_._charset.size (); |
| std::size_t *ptr_ = &lookup_->front (); |
| const std::size_t max_ = sizeof (CharT) == 1 ? |
| num_chars : num_wchar_ts; |
| |
| if (token_._negated) |
| { |
| CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0; |
| std::size_t i_ = 0; |
| |
| while (curr_ < chars_end_) |
| { |
| while (*curr_ > curr_char_) |
| { |
| ptr_[static_cast<typename Traits::index_type> |
| (curr_char_)] = index_ + dfa_offset; |
| ++curr_char_; |
| ++i_; |
| } |
| |
| ++curr_char_; |
| ++curr_; |
| ++i_; |
| } |
| |
| for (; i_ < max_; ++i_) |
| { |
| ptr_[static_cast<typename Traits::index_type>(curr_char_)] = |
| index_ + dfa_offset; |
| ++curr_char_; |
| } |
| } |
| else |
| { |
| while (curr_ < chars_end_) |
| { |
| ptr_[static_cast<typename Traits::index_type>(*curr_)] = |
| index_ + dfa_offset; |
| ++curr_; |
| } |
| } |
| } |
| |
| static void build_equiv_list (const node_vector *vector_, |
| const index_set_vector &set_mapping_, equivset_list &lhs_) |
| { |
| equivset_list rhs_; |
| |
| fill_rhs_list (vector_, set_mapping_, rhs_); |
| |
| if (!rhs_->empty ()) |
| { |
| typename equivset_list::list::iterator iter_; |
| typename equivset_list::list::iterator end_; |
| equivset_ptr overlap_ (new equivset); |
| |
| lhs_->push_back (static_cast<equivset *>(0)); |
| lhs_->back () = rhs_->front (); |
| rhs_->pop_front (); |
| |
| while (!rhs_->empty ()) |
| { |
| equivset_ptr r_ (rhs_->front ()); |
| |
| rhs_->pop_front (); |
| iter_ = lhs_->begin (); |
| end_ = lhs_->end (); |
| |
| while (!r_->empty () && iter_ != end_) |
| { |
| typename equivset_list::list::iterator l_iter_ = iter_; |
| |
| (*l_iter_)->intersect (*r_.get (), *overlap_.get ()); |
| |
| if (overlap_->empty ()) |
| { |
| ++iter_; |
| } |
| else if ((*l_iter_)->empty ()) |
| { |
| delete *l_iter_; |
| *l_iter_ = overlap_.release (); |
| |
| // VC++ 6 Hack: |
| equivset_ptr temp_overlap_ (new equivset); |
| |
| overlap_ = temp_overlap_; |
| ++iter_; |
| } |
| else if (r_->empty ()) |
| { |
| delete r_.release (); |
| r_ = overlap_; |
| |
| // VC++ 6 Hack: |
| equivset_ptr temp_overlap_ (new equivset); |
| |
| overlap_ = temp_overlap_; |
| break; |
| } |
| else |
| { |
| iter_ = lhs_->insert (++iter_, |
| static_cast<equivset *>(0)); |
| *iter_ = overlap_.release (); |
| |
| // VC++ 6 Hack: |
| equivset_ptr temp_overlap_ (new equivset); |
| |
| overlap_ = temp_overlap_; |
| ++iter_; |
| end_ = lhs_->end (); |
| } |
| } |
| |
| if (!r_->empty ()) |
| { |
| lhs_->push_back (static_cast<equivset *>(0)); |
| lhs_->back () = r_.release (); |
| } |
| } |
| } |
| } |
| |
| static void fill_rhs_list (const node_vector *vector_, |
| const index_set_vector &set_mapping_, equivset_list &list_) |
| { |
| typename node_vector::const_iterator iter_ = |
| vector_->begin (); |
| typename node_vector::const_iterator end_ = |
| vector_->end (); |
| |
| for (; iter_ != end_; ++iter_) |
| { |
| const detail::node *node_ = *iter_; |
| |
| if (!node_->end_state ()) |
| { |
| const std::size_t token_ = node_->token (); |
| |
| if (token_ != null_token) |
| { |
| list_->push_back (static_cast<equivset *>(0)); |
| |
| if (token_ == bol_token || token_ == eol_token) |
| { |
| std::set<std::size_t> index_set_; |
| |
| index_set_.insert (token_); |
| list_->back () = new equivset (index_set_, |
| node_->greedy (), token_, node_->followpos ()); |
| } |
| else |
| { |
| list_->back () = new equivset (set_mapping_[token_], |
| node_->greedy (), token_, node_->followpos ()); |
| } |
| } |
| } |
| } |
| } |
| |
| static void fixup_bol (detail::node * &root_, |
| node_ptr_vector &node_ptr_vector_) |
| { |
| typename detail::node::node_vector *first_ = &root_->firstpos (); |
| bool found_ = false; |
| typename detail::node::node_vector::const_iterator iter_ = |
| first_->begin (); |
| typename detail::node::node_vector::const_iterator end_ = |
| first_->end (); |
| |
| for (; iter_ != end_; ++iter_) |
| { |
| const detail::node *node_ = *iter_; |
| |
| found_ = !node_->end_state () && node_->token () == bol_token; |
| |
| if (found_) break; |
| } |
| |
| if (!found_) |
| { |
| node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0)); |
| node_ptr_vector_->back () = new detail::leaf_node |
| (bol_token, true); |
| |
| detail::node *lhs_ = node_ptr_vector_->back (); |
| |
| node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0)); |
| node_ptr_vector_->back () = new detail::leaf_node |
| (null_token, true); |
| |
| detail::node *rhs_ = node_ptr_vector_->back (); |
| |
| node_ptr_vector_->push_back |
| (static_cast<detail::selection_node *>(0)); |
| node_ptr_vector_->back () = |
| new detail::selection_node (lhs_, rhs_); |
| lhs_ = node_ptr_vector_->back (); |
| |
| node_ptr_vector_->push_back |
| (static_cast<detail::sequence_node *>(0)); |
| node_ptr_vector_->back () = |
| new detail::sequence_node (lhs_, root_); |
| root_ = node_ptr_vector_->back (); |
| } |
| } |
| |
| static void minimise_dfa (const std::size_t dfa_alphabet_, |
| size_t_vector &dfa_, std::size_t size_) |
| { |
| const std::size_t *first_ = &dfa_.front (); |
| const std::size_t *second_ = 0; |
| const std::size_t *end_ = first_ + size_; |
| std::size_t index_ = 1; |
| std::size_t new_index_ = 1; |
| std::size_t curr_index_ = 0; |
| index_set index_set_; |
| size_t_vector lookup_; |
| std::size_t *lookup_ptr_ = 0; |
| |
| lookup_.resize (size_ / dfa_alphabet_, null_token); |
| lookup_ptr_ = &lookup_.front (); |
| *lookup_ptr_ = 0; |
| // Only one 'jam' state, so skip it. |
| first_ += dfa_alphabet_; |
| |
| for (; first_ < end_; first_ += dfa_alphabet_, ++index_) |
| { |
| for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1; |
| second_ < end_; second_ += dfa_alphabet_, ++curr_index_) |
| { |
| if (index_set_.find (curr_index_) != index_set_.end ()) |
| { |
| continue; |
| } |
| |
| // Some systems have memcmp in namespace std. |
| using namespace std; |
| |
| if (memcmp (first_, second_, sizeof (std::size_t) * |
| dfa_alphabet_) == 0) |
| { |
| index_set_.insert (curr_index_); |
| lookup_ptr_[curr_index_] = new_index_; |
| } |
| } |
| |
| if (lookup_ptr_[index_] == null_token) |
| { |
| lookup_ptr_[index_] = new_index_; |
| ++new_index_; |
| } |
| } |
| |
| if (!index_set_.empty ()) |
| { |
| const std::size_t *front_ = &dfa_.front (); |
| size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_); |
| typename index_set::iterator set_end_ = |
| index_set_.end (); |
| const std::size_t *ptr_ = front_ + dfa_alphabet_; |
| std::size_t *new_ptr_ = 0; |
| |
| new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0); |
| new_ptr_ = &new_dfa_.front () + dfa_alphabet_; |
| size_ /= dfa_alphabet_; |
| |
| for (index_ = 1; index_ < size_; ++index_) |
| { |
| if (index_set_.find (index_) != set_end_) |
| { |
| ptr_ += dfa_alphabet_; |
| continue; |
| } |
| |
| new_ptr_[end_state_index] = ptr_[end_state_index]; |
| new_ptr_[id_index] = ptr_[id_index]; |
| new_ptr_[unique_id_index] = ptr_[unique_id_index]; |
| new_ptr_[state_index] = ptr_[state_index]; |
| new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]]; |
| new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]]; |
| new_ptr_ += dfa_offset; |
| ptr_ += dfa_offset; |
| |
| for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_) |
| { |
| *new_ptr_++ = lookup_ptr_[*ptr_++]; |
| } |
| } |
| |
| dfa_.swap (new_dfa_); |
| } |
| } |
| }; |
| |
| typedef basic_generator<char> generator; |
| typedef basic_generator<wchar_t> wgenerator; |
| } |
| } |
| |
| #endif |