blob: a514f3774793f14880983328c5a1f0cebfaf62dd [file] [log] [blame]
/*=============================================================================
Copyright (c) 2001-2003 Joel de Guzman
Copyright (c) 2011 Daniel James
http://spirit.sourceforge.net/
Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
#ifndef QUICKBOOK_SYMBOLS_IPP
#define QUICKBOOK_SYMBOLS_IPP
///////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/home/classic/symbols.hpp>
#include <boost/intrusive_ptr.hpp>
#include <boost/scoped_ptr.hpp>
///////////////////////////////////////////////////////////////////////////////
namespace quickbook
{
///////////////////////////////////////////////////////////////////////////////
//
// tst class
//
// This it the Ternary Search Tree from
// <boost/spirit/home/classic/symbols/impl/tst.ipp> adapted to be cheap
// to copy.
//
// Ternary Search Tree implementation. The data structure is faster than
// hashing for many typical search problems especially when the search
// interface is iterator based. Searching for a string of length k in a
// ternary search tree with n strings will require at most O(log n+k)
// character comparisons. TSTs are many times faster than hash tables
// for unsuccessful searches since mismatches are discovered earlier
// after examining only a few characters. Hash tables always examine an
// entire key when searching.
//
// For details see http://www.cs.princeton.edu/~rs/strings/.
//
// *** This is a low level class and is
// not meant for public consumption ***
//
///////////////////////////////////////////////////////////////////////////////
template <typename T, typename CharT>
struct tst_node
{
tst_node(CharT value_)
: reference_count(0)
, left()
, middle()
, right()
, data()
, value(value_)
{
}
tst_node(tst_node const& other)
: reference_count(0)
, left(other.left)
, middle(other.middle)
, right(other.right)
, data(other.data ? new T(*other.data) : 0)
, value(other.value)
{
}
// If you fancy a slight improvement in memory use,
// reference_count + value could probably be packed
// in the space for a single int.
int reference_count;
boost::intrusive_ptr<tst_node> left;
boost::intrusive_ptr<tst_node> middle;
boost::intrusive_ptr<tst_node> right;
boost::scoped_ptr<T> data;
CharT value;
private:
tst_node& operator=(tst_node const&);
};
template <typename T, typename CharT>
void intrusive_ptr_add_ref(tst_node<T, CharT>* ptr)
{ ++ptr->reference_count; }
template <typename T, typename CharT>
void intrusive_ptr_release(tst_node<T, CharT>* ptr)
{ if(--ptr->reference_count == 0) delete ptr; }
///////////////////////////////////////////////////////////////////////////
template <typename T, typename CharT>
class tst
{
typedef tst_node<T, CharT> node_t;
typedef boost::intrusive_ptr<node_t> node_ptr;
node_ptr root;
public:
struct search_info
{
T* data;
std::size_t length;
};
void swap(tst& other)
{
root.swap(other.root);
}
// Adds symbol to ternary search tree.
// If it already exists, then replace it with new value.
//
// pre: first != last
template <typename IteratorT>
T* add(IteratorT first, IteratorT const& last, T const& data)
{
assert (first != last);
node_ptr* np = &root;
CharT ch = *first;
for(;;)
{
if (!*np)
{
*np = new node_t(ch);
}
else if ((*np)->reference_count > 1)
{
*np = new node_t(**np);
}
if (ch < (*np)->value)
{
np = &(*np)->left;
}
else if (ch == (*np)->value)
{
++first;
if (first == last) break;
ch = *first;
np = &(*np)->middle;
}
else
{
np = &(*np)->right;
}
}
boost::scoped_ptr<T> new_data(new T(data));
boost::swap((*np)->data, new_data);
return (*np)->data.get();
}
template <typename ScannerT>
search_info find(ScannerT const& scan) const
{
search_info result = { 0, 0 };
if (scan.at_end()) {
return result;
}
typedef typename ScannerT::iterator_t iterator_t;
node_ptr np = root;
CharT ch = *scan;
iterator_t latest = scan.first;
std::size_t length = 0;
while (np)
{
if (ch < np->value) // => go left!
{
np = np->left;
}
else if (ch == np->value) // => go middle!
{
++scan;
++length;
// Found a potential match.
if (np->data.get())
{
result.data = np->data.get();
result.length = length;
latest = scan.first;
}
if (scan.at_end()) break;
ch = *scan;
np = np->middle;
}
else // (ch > np->value) => go right!
{
np = np->right;
}
}
scan.first = latest;
return result;
}
};
typedef boost::spirit::classic::symbols<
std::string,
char,
quickbook::tst<std::string, char>
> string_symbols;
} // namespace quickbook
#endif