| /*============================================================================= |
| Copyright (c) 2001-2003 Joel de Guzman |
| http://spirit.sourceforge.net/ |
| |
| Use, modification and distribution is subject to the Boost Software |
| License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at |
| http://www.boost.org/LICENSE_1_0.txt) |
| =============================================================================*/ |
| #ifndef BOOST_SPIRIT_TST_IPP |
| #define BOOST_SPIRIT_TST_IPP |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| #include <memory> // for std::auto_ptr |
| #include <boost/spirit/home/classic/core/assert.hpp> |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| namespace boost { namespace spirit { |
| |
| BOOST_SPIRIT_CLASSIC_NAMESPACE_BEGIN |
| |
| namespace impl |
| { |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // |
| // tst class |
| // |
| // Ternary Search Tree implementation. The data structure is faster than |
| // hashing for many typical search problems especially when the search |
| // interface is iterator based. Searching for a string of length k in a |
| // ternary search tree with n strings will require at most O(log n+k) |
| // character comparisons. TSTs are many times faster than hash tables |
| // for unsuccessful searches since mismatches are discovered earlier |
| // after examining only a few characters. Hash tables always examine an |
| // entire key when searching. |
| // |
| // For details see http://www.cs.princeton.edu/~rs/strings/. |
| // |
| // *** This is a low level class and is |
| // not meant for public consumption *** |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| template <typename T, typename CharT> |
| struct tst_node |
| { |
| tst_node(CharT value_) |
| : value(value_) |
| , left(0) |
| , right(0) |
| { middle.link = 0; } |
| |
| ~tst_node() |
| { |
| delete left; |
| delete right; |
| if (value) |
| delete middle.link; |
| else |
| delete middle.data; |
| } |
| |
| tst_node* |
| clone() const |
| { |
| std::auto_ptr<tst_node> copy(new tst_node(value)); |
| |
| if (left) |
| copy->left = left->clone(); |
| if (right) |
| copy->right = right->clone(); |
| |
| if (value && middle.link) |
| { |
| copy->middle.link = middle.link->clone(); |
| } |
| else |
| { |
| std::auto_ptr<T> mid_data(new T(*middle.data)); |
| copy->middle.data = mid_data.release(); |
| } |
| |
| return copy.release(); |
| } |
| |
| union center { |
| |
| tst_node* link; |
| T* data; |
| }; |
| |
| CharT value; |
| tst_node* left; |
| center middle; |
| tst_node* right; |
| }; |
| |
| /////////////////////////////////////////////////////////////////////////// |
| template <typename T, typename CharT> |
| class tst |
| { |
| public: |
| |
| struct search_info |
| { |
| T* data; |
| std::size_t length; |
| }; |
| |
| tst() |
| : root(0) {} |
| |
| tst(tst const& other) |
| : root(other.root ? other.root->clone() : 0) {} |
| |
| ~tst() |
| { delete root; } |
| |
| tst& |
| operator=(tst const& other) |
| { |
| if (this != &other) |
| { |
| node_t* new_root = other.root ? other.root->clone() : 0; |
| delete root; |
| root = new_root; |
| } |
| return *this; |
| } |
| |
| template <typename IteratorT> |
| T* add(IteratorT first, IteratorT const& last, T const& data) |
| { |
| if (first == last) |
| return 0; |
| |
| node_t** np = &root; |
| CharT ch = *first; |
| |
| BOOST_SPIRIT_ASSERT((first == last || ch != 0) |
| && "Won't add string containing null character"); |
| |
| for (;;) |
| { |
| if (*np == 0 || ch == 0) |
| { |
| node_t* right = 0; |
| if (np != 0) |
| right = *np; |
| *np = new node_t(ch); |
| if (right) |
| (**np).right = right; |
| } |
| |
| if (ch < (**np).value) |
| { |
| np = &(**np).left; |
| } |
| else |
| { |
| if (ch == (**np).value) |
| { |
| if (ch == 0) |
| { |
| if ((**np).middle.data == 0) |
| { |
| (**np).middle.data = new T(data); |
| return (**np).middle.data; |
| } |
| else |
| { |
| // re-addition is disallowed |
| return 0; |
| } |
| } |
| ++first; |
| ch = (first == last) ? CharT(0) : *first; |
| BOOST_SPIRIT_ASSERT((first == last || ch != 0) |
| && "Won't add string containing null character"); |
| np = &(**np).middle.link; |
| } |
| else |
| { |
| np = &(**np).right; |
| } |
| } |
| } |
| } |
| |
| template <typename ScannerT> |
| search_info find(ScannerT const& scan) const |
| { |
| search_info result = { 0, 0 }; |
| if (scan.at_end()) { |
| return result; |
| } |
| |
| typedef typename ScannerT::iterator_t iterator_t; |
| node_t* np = root; |
| CharT ch = *scan; |
| iterator_t save = scan.first; |
| iterator_t latest = scan.first; |
| std::size_t latest_len = 0; |
| |
| while (np) |
| { |
| |
| if (ch < np->value) // => go left! |
| { |
| if (np->value == 0) |
| { |
| result.data = np->middle.data; |
| if (result.data) |
| { |
| latest = scan.first; |
| latest_len = result.length; |
| } |
| } |
| |
| np = np->left; |
| } |
| else if (ch == np->value) // => go middle! |
| { |
| // Matching the null character is not allowed. |
| if (np->value == 0) |
| { |
| result.data = np->middle.data; |
| if (result.data) |
| { |
| latest = scan.first; |
| latest_len = result.length; |
| } |
| break; |
| } |
| |
| ++scan; |
| ch = scan.at_end() ? CharT(0) : *scan; |
| np = np->middle.link; |
| ++result.length; |
| } |
| else // (ch > np->value) => go right! |
| { |
| if (np->value == 0) |
| { |
| result.data = np->middle.data; |
| if (result.data) |
| { |
| latest = scan.first; |
| latest_len = result.length; |
| } |
| } |
| |
| np = np->right; |
| } |
| } |
| |
| if (result.data == 0) |
| { |
| scan.first = save; |
| } |
| else |
| { |
| scan.first = latest; |
| result.length = latest_len; |
| } |
| return result; |
| } |
| |
| private: |
| |
| typedef tst_node<T, CharT> node_t; |
| node_t* root; |
| }; |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| } // namespace impl |
| |
| BOOST_SPIRIT_CLASSIC_NAMESPACE_END |
| |
| }} // namespace boost::spirit |
| |
| #endif |