blob: 6d2fcad978bf9b92441124342decc4040de7611e [file] [log] [blame]
///////////////////////////////////////////////////////////////////////////////
// toy_spirit.hpp
//
// Copyright 2008 Eric Niebler. Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <cctype>
#include <string>
#include <cstring>
#include <iostream>
#include <boost/assert.hpp>
#include <boost/mpl/assert.hpp>
#include <boost/proto/core.hpp>
#include <boost/proto/context.hpp>
#include <boost/test/unit_test.hpp>
namespace boost
{
// global tags
struct char_tag {};
struct ichar_tag {};
struct istring_tag {};
struct ichar_range_tag {};
struct never_tag {};
struct always_tag {};
struct space_tag {};
// global primitives
proto::terminal<char_tag>::type const char_ = {{}};
proto::terminal<space_tag>::type const space = {{}};
using proto::lit;
using proto::literal;
}
namespace boost { namespace spirit2
{
// handy typedefs
typedef proto::terminal<char_tag>::type anychar_p;
typedef proto::terminal<ichar_tag>::type ianychar_p;
typedef proto::terminal<istring_tag>::type ianystr_p;
typedef proto::terminal<ichar_range_tag>::type ianychar_range_p;
typedef proto::terminal<never_tag>::type never_p;
typedef proto::terminal<space_tag>::type space_p;
struct SpiritGrammar;
struct SkipperGrammar;
struct SpiritPrimitives;
template<typename Grammar>
struct SpiritComposites;
struct CharLiteral
: proto::terminal<char>
{};
struct NTBSLiteral
: proto::terminal<char const *>
{};
struct StdStringLiteral
: proto::terminal<std::string>
{};
struct CharParser
: proto::function<anychar_p, CharLiteral>
{};
struct ICharParser
: proto::function<ianychar_p, CharLiteral, CharLiteral>
{};
struct CharRangeParser
: proto::function<anychar_p, CharLiteral, CharLiteral>
{};
struct IStrParser
: proto::function<ianystr_p, StdStringLiteral>
{};
struct ICharRangeParser
: proto::function<ianychar_range_p, CharLiteral, CharLiteral>
{};
ianychar_p const ichar_ = {{}};
ianystr_p const istr_ = {{}};
ianychar_range_p const ichar_range_ = {{}};
namespace utility
{
inline bool char_icmp(char ch, char lo, char hi)
{
return ch == lo || ch == hi;
}
template<typename FwdIter>
inline bool string_cmp(char const *sz, FwdIter &begin, FwdIter end)
{
FwdIter tmp = begin;
for(; *sz; ++tmp, ++sz)
if(tmp == end || *tmp != *sz)
return false;
begin = tmp;
return true;
}
template<typename FwdIter>
inline bool string_icmp(std::string const &str, FwdIter &begin, FwdIter end)
{
BOOST_ASSERT(0 == str.size() % 2);
FwdIter tmp = begin;
std::string::const_iterator istr = str.begin(), estr = str.end();
for(; istr != estr; ++tmp, istr += 2)
if(tmp == end || (*tmp != *istr && *tmp != *(istr+1)))
return false;
begin = tmp;
return true;
}
inline bool in_range(char ch, char lo, char hi)
{
return ch >= lo && ch <= hi;
}
inline bool in_irange(char ch, char lo, char hi)
{
return in_range(ch, lo, hi)
|| in_range(std::tolower(ch), lo, hi)
|| in_range(std::toupper(ch), lo, hi);
}
inline std::string to_istr(char const *sz)
{
std::string res;
res.reserve(std::strlen(sz) * 2);
for(; *sz; ++sz)
{
res.push_back(std::tolower(*sz));
res.push_back(std::toupper(*sz));
}
return res;
}
} // namespace utility
template<typename FwdIter, typename Skipper = never_p>
struct spirit_context
: std::pair<FwdIter, FwdIter>
, proto::callable_context<spirit_context<FwdIter, Skipper> >
{
typedef bool result_type;
typedef FwdIter iterator;
spirit_context(FwdIter first, FwdIter second, Skipper const &skip = Skipper())
: std::pair<FwdIter, FwdIter>(first, second)
, skip_(skip)
, in_skip_(false)
{}
// parse function for anychar_p
bool operator()(proto::tag::terminal, char_tag)
{
this->skip();
if(this->first == this->second)
return false;
++this->first;
return true;
}
// parse function for char_('a')
template<typename Expr>
bool operator()(proto::tag::function, anychar_p, Expr const &expr)
{
this->skip();
return proto::eval(expr, *this);
}
// parse function for space_p
bool operator()(proto::tag::terminal, space_tag)
{
this->skip();
if(this->first == this->second || !std::isspace(*this->first))
return false;
++this->first;
return true;
}
// parse function for bare character literals
bool operator()(proto::tag::terminal, char ch)
{
this->skip();
if(this->first == this->second || *this->first != ch)
return false;
++this->first;
return true;
}
// case-insensitive character parser
template<typename Arg1, typename Arg2>
bool operator()(proto::tag::function, ianychar_p, Arg1 const &arg1, Arg2 const &arg2)
{
this->skip();
if(this->first == this->second
|| !utility::char_icmp(*this->first, proto::value(arg1), proto::value(arg2)))
return false;
++this->first;
return true;
}
// parse function for NTBS literals
bool operator()(proto::tag::terminal, char const *sz)
{
this->skip();
return utility::string_cmp(sz, this->first, this->second);
}
// parse function for istr_("hello")
template<typename Expr>
bool operator()(proto::tag::function, ianystr_p, Expr const &expr)
{
this->skip();
return utility::string_icmp(proto::value(expr), this->first, this->second);
}
// parse function for char_('a','z')
template<typename Arg1, typename Arg2>
bool operator()(proto::tag::function, anychar_p, Arg1 const &arg1, Arg2 const &arg2)
{
BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2));
this->skip();
if(this->first == this->second
|| !utility::in_range(*this->first, proto::value(arg1), proto::value(arg2)))
return false;
++this->first;
return true;
}
// parse function for ichar_range_('a','z')
template<typename Arg1, typename Arg2>
bool operator()(proto::tag::function, ianychar_range_p, Arg1 const &arg1, Arg2 const &arg2)
{
BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2));
this->skip();
if(this->first == this->second
|| !utility::in_irange(*this->first, proto::value(arg1), proto::value(arg2)))
return false;
++this->first;
return true;
}
// parse function for complemented thingies (where thingies are assumed
// to be 1 character wide).
template<typename Expr>
bool operator()(proto::tag::complement, Expr const &expr)
{
this->skip();
iterator where = this->first;
if(proto::eval(expr, *this))
return this->first = where, false;
this->first = ++where;
return true;
}
// never_p parse function always returns false.
bool operator()(proto::tag::terminal, never_tag)
{
return false;
}
// for A >> B, succeeds if A and B matches.
template<typename Left, typename Right>
bool operator()(proto::tag::shift_right, Left const &left, Right const &right)
{
return proto::eval(left, *this) && proto::eval(right, *this);
}
// for A | B, succeeds if either A or B matches at this point.
template<typename Left, typename Right>
bool operator()(proto::tag::bitwise_or, Left const &left, Right const &right)
{
iterator where = this->first;
return proto::eval(left, *this) || proto::eval(right, this->reset(where));
}
// for *A, greedily match A as many times as possible.
template<typename Expr>
bool operator()(proto::tag::dereference, Expr const &expr)
{
iterator where = this->first;
while(proto::eval(expr, *this))
where = this->first;
// make sure that when we return true, the iterator is at the correct position!
this->first = where;
return true;
}
// for +A, greedily match A one or more times.
template<typename Expr>
bool operator()(proto::tag::unary_plus, Expr const &expr)
{
return proto::eval(expr, *this) && proto::eval(*expr, *this);
}
// for !A, optionally match A.
template<typename Expr>
bool operator()(proto::tag::logical_not, Expr const &expr)
{
iterator where = this->first;
if(!proto::eval(expr, *this))
this->first = where;
return true;
}
// for (A - B), matches when A but not B matches.
template<typename Left, typename Right>
bool operator()(proto::tag::minus, Left const &left, Right const &right)
{
iterator where = this->first;
return !proto::eval(right, *this) && proto::eval(left, this->reset(where));
}
private:
spirit_context &reset(iterator where)
{
this->first = where;
return *this;
}
void skip()
{
if(!this->in_skip_)
{
this->in_skip_ = true;
while(proto::eval(this->skip_, *this))
{}
this->in_skip_ = false;
}
}
Skipper skip_;
bool in_skip_;
};
struct as_ichar_parser : proto::callable
{
typedef proto::function<
ianychar_p
, proto::terminal<char>::type
, proto::terminal<char>::type
>::type result_type;
template<typename Expr>
result_type operator()(Expr const &expr) const
{
char lo = std::tolower(proto::value(proto::child_c<1>(expr)));
char hi = std::toupper(proto::value(proto::child_c<1>(expr)));
result_type that = {ichar_, {lo}, {hi}};
return that;
}
};
struct as_ichar_range_parser : proto::callable
{
typedef proto::function<
ianychar_range_p
, proto::terminal<char>::type
, proto::terminal<char>::type
>::type result_type;
template<typename Expr>
result_type operator()(Expr const &expr) const
{
char lo = proto::value(proto::child_c<1>(expr));
char hi = proto::value(proto::child_c<2>(expr));
result_type that = {ichar_range_, {lo}, {hi}};
return that;
}
};
struct as_ichar_literal : proto::callable
{
typedef proto::function<
ianychar_p
, proto::terminal<char>::type
, proto::terminal<char>::type
>::type result_type;
template<typename Expr>
result_type operator()(Expr const &expr) const
{
char lo = std::tolower(proto::value(expr));
char hi = std::toupper(proto::value(expr));
result_type that = {ichar_, {lo}, {hi}};
return that;
}
};
struct as_intbs_literal : proto::callable
{
typedef proto::function<
ianystr_p
, proto::terminal<std::string>::type
>::type result_type;
template<typename Expr>
result_type operator()(Expr const &expr) const
{
result_type that = {istr_, {utility::to_istr(proto::value(expr))}};
return that;
}
};
struct as_istdstring_literal : proto::callable
{
typedef proto::function<
ianystr_p
, proto::terminal<std::string>::type
>::type result_type;
template<typename Expr>
result_type operator()(Expr const &expr) const
{
result_type that = {istr_, {utility::to_istr(proto::value(expr).c_str())}};
return that;
}
};
///////////////////////////////////////////////////////////////////////////
// Transforms
///////////////////////////////////////////////////////////////////////////
struct skip_primitives : proto::transform<skip_primitives>
{
template<typename Expr, typename State, typename Data>
struct impl : proto::transform_impl<Expr, State, Data>
{
typedef
typename proto::shift_right<
typename proto::dereference<State>::type
, Expr
>::type
result_type;
result_type operator ()(
typename impl::expr_param expr
, typename impl::state_param state
, typename impl::data_param data
) const
{
result_type that = {{state}, expr};
return that;
}
};
};
///////////////////////////////////////////////////////////////////////////
// Grammar
///////////////////////////////////////////////////////////////////////////
using proto::_;
struct SpiritGrammar;
struct SpiritCaseSensitivePrimitives
: proto::or_<
proto::when<CharParser, as_ichar_parser(_)>
, proto::when<CharLiteral, as_ichar_literal(_)>
, proto::when<NTBSLiteral, as_intbs_literal(_)>
, proto::when<CharRangeParser, as_ichar_range_parser(_)>
, proto::when<StdStringLiteral, as_istdstring_literal(_)>
>
{};
struct SpiritCaseInsensitivePrimitives
: proto::or_<
anychar_p
, IStrParser
, ICharParser
, ICharRangeParser
, proto::complement<SpiritPrimitives>
>
{};
struct SpiritPrimitives
: proto::or_<
SpiritCaseSensitivePrimitives
, SpiritCaseInsensitivePrimitives
>
{};
template<typename Grammar>
struct SpiritComposites
: proto::or_<
proto::bitwise_or< Grammar, Grammar >
, proto::shift_right< Grammar, Grammar >
, proto::minus< Grammar, Grammar >
, proto::dereference< Grammar >
, proto::unary_plus< Grammar >
, proto::logical_not< Grammar >
>
{};
// Regular Spirit grammar, has no-case transforms
struct SpiritGrammar
: proto::or_<
SpiritComposites<SpiritGrammar>
, SpiritPrimitives
>
{};
// Spirit grammar with the skipper transform
struct SkipperGrammar
: proto::or_<
SpiritComposites<SkipperGrammar>
, proto::when<SpiritPrimitives, skip_primitives>
>
{};
///////////////////////////////////////////////////////////////////////////
// Directives
///////////////////////////////////////////////////////////////////////////
struct no_case_directive
{
template<typename Expr>
typename boost::result_of<SpiritGrammar(Expr const &)>::type const
operator [](Expr const &expr) const
{
return SpiritGrammar()(expr);
}
};
// no_case
no_case_directive const no_case = {};
template<typename Skipper>
struct skip_directive
{
skip_directive(Skipper const &skip)
: skip_(skip)
{}
template<typename Expr>
typename boost::result_of<SkipperGrammar(Expr const &, Skipper const &)>::type const
operator [](Expr const &expr) const
{
return SkipperGrammar()(expr, this->skip_);
}
private:
Skipper skip_;
};
// skip
template<typename Skipper>
skip_directive<Skipper> skip(Skipper const &skip)
{
return skip_directive<Skipper>(skip);
}
///////////////////////////////////////////////////////////////////////////
// parse
///////////////////////////////////////////////////////////////////////////
template<typename FwdIter, typename Rule>
bool parse(FwdIter begin, FwdIter end, Rule const &rule)
{
// make sure the rule corresponds to the Spirit grammar:
BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>));
spirit_context<FwdIter> ctx(begin, end);
return proto::eval(rule, ctx);
}
// parse with a skip parser can be implemented in one of two ways:
// Method 1)
// The skip parser is passed to all the parsers which invoke it
// before they invoke themselves. This is how Spirit-1 does it,
// and it is the cause of the Scanner Business. However, it has
// the advantage of not needing a parser transformation phase.
// Method 2)
// Transform the expression template to insert the skip parser
// in between all sequenced parsers. That is, transform (A >> B)
// to (*skip >> A >> *skip >> B). This has the advantage of making
// it unnecessary to pass the scanner to all the parsers, which
// means its type doesn't show up in function signatures, avoiding
// the Scanner Business.
// Recommendation:
// Both methods should be supported. Method 1 should be preferred
// when calling parse with parsers defined inline. Method 2 should
// be preferred when a parser expression is assigned to a rule<>,
// thereby making the type of the rule<> independent of the skip
// parser used. I imagine a syntax like:
// rule<> r = skip(space)[A >> B >> C]
template<typename FwdIter, typename Rule, typename Skipper>
bool parse(FwdIter begin, FwdIter end, Rule const &rule, Skipper const &skipper)
{
// make sure the rule corresponds to the Spirit grammar:
BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>));
//// Method 1: pass skip parser in the context structure.
//spirit_context<FwdIter, Skipper> ctx(begin, end, skipper);
//return proto::eval(rule, ctx);
// Method 2: Embed skip parser via tree transformation.
spirit_context<FwdIter> ctx(begin, end);
return proto::eval(spirit2::skip(skipper)[rule], ctx);
}
}}
using namespace boost;
using namespace spirit2;
void test_toy_spirit()
{
std::string str("abcd");
// This will fail:
BOOST_CHECK(!spirit2::parse(str.begin(), str.end()
, char_ >> char_('a')));
// This will succeed:
BOOST_CHECK(spirit2::parse(str.begin(), str.end()
, char_ >> char_('b') >> char_ >> 'd'));
// This will succeed:
BOOST_CHECK(spirit2::parse(str.begin(), str.end()
, 'a' >> ('c' >> char_ | 'b' >> char_('d') | 'b' >> char_('c')) >> 'd'));
// This will succeed:
BOOST_CHECK(spirit2::parse(str.begin(), str.end()
, *(char_ - 'd')));
// This will succeed:
BOOST_CHECK(spirit2::parse(str.begin(), str.end()
, no_case[char_('A') >> 'B' >> "CD"]));
// This will succeed:
BOOST_CHECK(spirit2::parse(str.begin(), str.end()
, no_case[*char_('A','Z')]));
literal<char> a = lit('a');
literal<char const *> bcd = lit("bcd");
// This will succeed:
BOOST_CHECK(spirit2::parse(str.begin(), str.end()
, +~~a >> no_case[bcd]));
// Scanner Business: R.I.P. :-)
str = "a b cd";
BOOST_CHECK(spirit2::parse(str.begin(), str.end()
, char_('a') >> 'b' >> 'c' >> 'd', space >> space));
}
using namespace boost::unit_test;
///////////////////////////////////////////////////////////////////////////////
// init_unit_test_suite
//
test_suite* init_unit_test_suite( int argc, char* argv[] )
{
test_suite *test = BOOST_TEST_SUITE("test proto and and toy spirit-2");
test->add(BOOST_TEST_CASE(&test_toy_spirit));
return test;
}