boost_1_45_0/libs/spirit/example/lex/example5.cpp - nest-learning-thermostat/5.0/boost - Git at Google

 //  Copyright (c) 2001-2010 Hartmut Kaiser
 //
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

 //  This example shows how to create a simple lexer recognizing a couple of
 //  different tokens aimed at a simple language and how to use this lexer with
 //  a grammar. It shows how to associate attributes to tokens and how to access the
 //  token attributes from inside the grammar.
 //
 //  Additionally, this example demonstrates, how to define a token set usable
 //  as the skip parser during parsing, allowing to define several tokens to be
 //  ignored.
 //
 //  The main purpose of this example is to show how inheritance can be used to
 //  overload parts of a base grammar and add token definitions to a base lexer.
 //
 //  Further, it shows how you can use the 'omit' attribute type specifier
 //  for token definitions to force the token to have no attribute (expose an
 //  unused attribute).
 //
 //  This example recognizes a very simple programming language having
 //  assignment statements and if and while control structures. Look at the file
 //  example5.input for an example.

 #include <boost/config/warning_disable.hpp>
 #include <boost/spirit/include/qi.hpp>
 #include <boost/spirit/include/lex_lexertl.hpp>
 #include <boost/spirit/include/phoenix_operator.hpp>

 #include <iostream>
 #include <fstream>
 #include <string>

 #include "example.hpp"

 using namespace boost::spirit;
 using boost::phoenix::val;

 ///////////////////////////////////////////////////////////////////////////////
 //  Token definition base, defines all tokens for the base grammar below
 ///////////////////////////////////////////////////////////////////////////////
 template <typename Lexer>
 struct example5_base_tokens : lex::lexer<Lexer>
 {
 protected:
     // this lexer is supposed to be used as a base type only
     example5_base_tokens() {}

 public:
     void init_token_definitions()
     {
         // define the tokens to match
         identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
         constant = "[0-9]+";
         if_ = "if";
         while_ = "while";

         // associate the tokens and the token set with the lexer
         this->self += lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
         this->self += if_ | while_ | identifier;

         // define the whitespace to ignore (spaces, tabs, newlines and C-style
         // comments)
         this->self("WS")
             =   lex::token_def<>("[ \\t\\n]+")
             |   "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
             ;
     }

     // these tokens have no attribute
     lex::token_def<lex::omit> if_, while_;

     // The following two tokens have an associated attribute type, 'identifier'
     // carries a string (the identifier name) and 'constant' carries the
     // matched integer value.
     //
     // Note: any token attribute type explicitly specified in a token_def<>
     //       declaration needs to be listed during token type definition as
     //       well (see the typedef for the token_type below).
     //
     // The conversion of the matched input to an instance of this type occurs
     // once (on first access), which makes token attributes as efficient as
     // possible. Moreover, token instances are constructed once by the lexer
     // library. From this point on tokens are passed by reference only,
     // avoiding them being copied around.
     lex::token_def<std::string> identifier;
     lex::token_def<unsigned int> constant;
 };

 ///////////////////////////////////////////////////////////////////////////////
 //  Grammar definition base, defines a basic language
 ///////////////////////////////////////////////////////////////////////////////
 template <typename Iterator, typename Lexer>
 struct example5_base_grammar
   : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
 {
     template <typename TokenDef>
     example5_base_grammar(TokenDef const& tok)
       : example5_base_grammar::base_type(program)
     {
         using boost::spirit::_val;

         program
             =  +block
             ;

         block
             =   '{' >> *statement >> '}'
             ;

         statement
             =   assignment
             |   if_stmt
             |   while_stmt
             ;

         assignment
             =   (tok.identifier >> '=' >> expression >> ';')
                 [
                     std::cout << val("assignment statement to: ") << _1 << "\n"
                 ]
             ;

         if_stmt
             =   (tok.if_ >> '(' >> expression >> ')' >> block)
                 [
                     std::cout << val("if expression: ") << _1 << "\n"
                 ]
             ;

         while_stmt
             =   (tok.while_ >> '(' >> expression >> ')' >> block)
                 [
                     std::cout << val("while expression: ") << _1 << "\n"
                 ]
             ;

         //  since expression has a variant return type accommodating for
         //  std::string and unsigned integer, both possible values may be
         //  returned to the calling rule
         expression
             =   tok.identifier [ _val = _1 ]
             |   tok.constant   [ _val = _1 ]
             ;
     }

     typedef qi::in_state_skipper<Lexer> skipper_type;

     qi::rule<Iterator, skipper_type> program, block, statement;
     qi::rule<Iterator, skipper_type> assignment, if_stmt;
     qi::rule<Iterator, skipper_type> while_stmt;

     //  the expression is the only rule having a return value
     typedef boost::variant<unsigned int, std::string> expression_type;
     qi::rule<Iterator, expression_type(), skipper_type>  expression;
 };

 ///////////////////////////////////////////////////////////////////////////////
 //  Token definition for derived lexer, defines additional tokens
 ///////////////////////////////////////////////////////////////////////////////
 template <typename Lexer>
 struct example5_tokens : example5_base_tokens<Lexer>
 {
     typedef example5_base_tokens<Lexer> base_type;

     example5_tokens()
     {
         // define the additional token to match
         else_ = "else";

         // associate the new token with the lexer, note we add 'else' before
         // anything else to add it to the token set before the identifier
         // token, otherwise "else" would be matched as an identifier
         this->self = else_;

         // now add the token definitions from the base class
         this->base_type::init_token_definitions();
     }

     // this token has no attribute
     lex::token_def<lex::omit> else_;
 };

 ///////////////////////////////////////////////////////////////////////////////
 //  Derived grammar definition, defines a language extension
 ///////////////////////////////////////////////////////////////////////////////
 template <typename Iterator, typename Lexer>
 struct example5_grammar : example5_base_grammar<Iterator, Lexer>
 {
     template <typename TokenDef>
     example5_grammar(TokenDef const& tok)
       : example5_base_grammar<Iterator, Lexer>(tok)
     {
         // we alter the if_stmt only
         this->if_stmt
             =   this->if_stmt.copy() >> -(tok.else_ >> this->block)
             ;
     }
 };

 ///////////////////////////////////////////////////////////////////////////////
 int main()
 {
     // iterator type used to expose the underlying input stream
     typedef std::string::iterator base_iterator_type;

     // This is the lexer token type to use. The second template parameter lists
     // all attribute types used for token_def's during token definition (see
     // example5_base_tokens<> above). Here we use the predefined lexertl token
     // type, but any compatible token type may be used instead.
     //
     // If you don't list any token attribute types in the following declaration
     // (or just use the default token type: lexertl_token<base_iterator_type>)
     // it will compile and work just fine, just a bit less efficient. This is
     // because the token attribute will be generated from the matched input
     // sequence every time it is requested. But as soon as you specify at
     // least one token attribute type you'll have to list all attribute types
     // used for token_def<> declarations in the token definition class above,
     // otherwise compilation errors will occur.
     typedef lex::lexertl::token<
         base_iterator_type, boost::mpl::vector<unsigned int, std::string>
     > token_type;

     // Here we use the lexertl based lexer engine.
     typedef lex::lexertl::lexer<token_type> lexer_type;

     // This is the token definition type (derived from the given lexer type).
     typedef example5_tokens<lexer_type> example5_tokens;

     // this is the iterator type exposed by the lexer
     typedef example5_tokens::iterator_type iterator_type;

     // this is the type of the grammar to parse
     typedef example5_grammar<iterator_type, example5_tokens::lexer_def> example5_grammar;

     // now we use the types defined above to create the lexer and grammar
     // object instances needed to invoke the parsing process
     example5_tokens tokens;                         // Our lexer
     example5_grammar calc(tokens);                  // Our parser

     std::string str (read_from_file("example5.input"));

     // At this point we generate the iterator pair used to expose the
     // tokenized input stream.
     std::string::iterator it = str.begin();
     iterator_type iter = tokens.begin(it, str.end());
     iterator_type end = tokens.end();

     // Parsing is done based on the the token stream, not the character
     // stream read from the input.
     // Note how we use the lexer defined above as the skip parser. It must
     // be explicitly wrapped inside a state directive, switching the lexer
     // state for the duration of skipping whitespace.
     std::string ws("WS");
     bool r = qi::phrase_parse(iter, end, calc, qi::in_state(ws)[tokens.self]);

     if (r && iter == end)
     {
         std::cout << "-------------------------\n";
         std::cout << "Parsing succeeded\n";
         std::cout << "-------------------------\n";
     }
     else
     {
         std::cout << "-------------------------\n";
         std::cout << "Parsing failed\n";
         std::cout << "-------------------------\n";
     }

     std::cout << "Bye... :-) \n\n";
     return 0;
 }
	// Copyright (c) 2001-2010 Hartmut Kaiser
	//
	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

	// This example shows how to create a simple lexer recognizing a couple of
	// different tokens aimed at a simple language and how to use this lexer with
	// a grammar. It shows how to associate attributes to tokens and how to access the
	// token attributes from inside the grammar.
	//
	// Additionally, this example demonstrates, how to define a token set usable
	// as the skip parser during parsing, allowing to define several tokens to be
	// ignored.
	//
	// The main purpose of this example is to show how inheritance can be used to
	// overload parts of a base grammar and add token definitions to a base lexer.
	//
	// Further, it shows how you can use the 'omit' attribute type specifier
	// for token definitions to force the token to have no attribute (expose an
	// unused attribute).
	//
	// This example recognizes a very simple programming language having
	// assignment statements and if and while control structures. Look at the file
	// example5.input for an example.

	#include <boost/config/warning_disable.hpp>
	#include <boost/spirit/include/qi.hpp>
	#include <boost/spirit/include/lex_lexertl.hpp>
	#include <boost/spirit/include/phoenix_operator.hpp>

	#include <iostream>
	#include <fstream>
	#include <string>

	#include "example.hpp"

	using namespace boost::spirit;
	using boost::phoenix::val;

	///////////////////////////////////////////////////////////////////////////////
	// Token definition base, defines all tokens for the base grammar below
	///////////////////////////////////////////////////////////////////////////////
	template <typename Lexer>
	struct example5_base_tokens : lex::lexer<Lexer>
	{
	protected:
	// this lexer is supposed to be used as a base type only
	example5_base_tokens() {}

	public:
	void init_token_definitions()
	{
	// define the tokens to match
	identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
	constant = "[0-9]+";
	if_ = "if";
	while_ = "while";

	// associate the tokens and the token set with the lexer
	this->self += lex::token_def<>('(') \| ')' \| '{' \| '}' \| '=' \| ';' \| constant;
	this->self += if_ \| while_ \| identifier;

	// define the whitespace to ignore (spaces, tabs, newlines and C-style
	// comments)
	this->self("WS")
	= lex::token_def<>("[ \\t\\n]+")
	\| "\\/\\[^]\\+([^/][^]\\+)*\\/"
	;
	}

	// these tokens have no attribute
	lex::token_def<lex::omit> if_, while_;

	// The following two tokens have an associated attribute type, 'identifier'
	// carries a string (the identifier name) and 'constant' carries the
	// matched integer value.
	//
	// Note: any token attribute type explicitly specified in a token_def<>
	// declaration needs to be listed during token type definition as
	// well (see the typedef for the token_type below).
	//
	// The conversion of the matched input to an instance of this type occurs
	// once (on first access), which makes token attributes as efficient as
	// possible. Moreover, token instances are constructed once by the lexer
	// library. From this point on tokens are passed by reference only,
	// avoiding them being copied around.
	lex::token_def<std::string> identifier;
	lex::token_def<unsigned int> constant;
	};

	///////////////////////////////////////////////////////////////////////////////
	// Grammar definition base, defines a basic language
	///////////////////////////////////////////////////////////////////////////////
	template <typename Iterator, typename Lexer>
	struct example5_base_grammar
	: qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
	{
	template <typename TokenDef>
	example5_base_grammar(TokenDef const& tok)
	: example5_base_grammar::base_type(program)
	{
	using boost::spirit::_val;

	program
	= +block
	;

	block
	= '{' >> *statement >> '}'
	;

	statement
	= assignment
	\| if_stmt
	\| while_stmt
	;

	assignment
	= (tok.identifier >> '=' >> expression >> ';')
	[
	std::cout << val("assignment statement to: ") << _1 << "\n"
	]
	;

	if_stmt
	= (tok.if_ >> '(' >> expression >> ')' >> block)
	[
	std::cout << val("if expression: ") << _1 << "\n"
	]
	;

	while_stmt
	= (tok.while_ >> '(' >> expression >> ')' >> block)
	[
	std::cout << val("while expression: ") << _1 << "\n"
	]
	;

	// since expression has a variant return type accommodating for
	// std::string and unsigned integer, both possible values may be
	// returned to the calling rule
	expression
	= tok.identifier [ _val = _1 ]
	\| tok.constant [ _val = _1 ]
	;
	}

	typedef qi::in_state_skipper<Lexer> skipper_type;

	qi::rule<Iterator, skipper_type> program, block, statement;
	qi::rule<Iterator, skipper_type> assignment, if_stmt;
	qi::rule<Iterator, skipper_type> while_stmt;

	// the expression is the only rule having a return value
	typedef boost::variant<unsigned int, std::string> expression_type;
	qi::rule<Iterator, expression_type(), skipper_type> expression;
	};

	///////////////////////////////////////////////////////////////////////////////
	// Token definition for derived lexer, defines additional tokens
	///////////////////////////////////////////////////////////////////////////////
	template <typename Lexer>
	struct example5_tokens : example5_base_tokens<Lexer>
	{
	typedef example5_base_tokens<Lexer> base_type;

	example5_tokens()
	{
	// define the additional token to match
	else_ = "else";

	// associate the new token with the lexer, note we add 'else' before
	// anything else to add it to the token set before the identifier
	// token, otherwise "else" would be matched as an identifier
	this->self = else_;

	// now add the token definitions from the base class
	this->base_type::init_token_definitions();
	}

	// this token has no attribute
	lex::token_def<lex::omit> else_;
	};

	///////////////////////////////////////////////////////////////////////////////
	// Derived grammar definition, defines a language extension
	///////////////////////////////////////////////////////////////////////////////
	template <typename Iterator, typename Lexer>
	struct example5_grammar : example5_base_grammar<Iterator, Lexer>
	{
	template <typename TokenDef>
	example5_grammar(TokenDef const& tok)
	: example5_base_grammar<Iterator, Lexer>(tok)
	{
	// we alter the if_stmt only
	this->if_stmt
	= this->if_stmt.copy() >> -(tok.else_ >> this->block)
	;
	}
	};

	///////////////////////////////////////////////////////////////////////////////
	int main()
	{
	// iterator type used to expose the underlying input stream
	typedef std::string::iterator base_iterator_type;

	// This is the lexer token type to use. The second template parameter lists
	// all attribute types used for token_def's during token definition (see
	// example5_base_tokens<> above). Here we use the predefined lexertl token
	// type, but any compatible token type may be used instead.
	//
	// If you don't list any token attribute types in the following declaration
	// (or just use the default token type: lexertl_token<base_iterator_type>)
	// it will compile and work just fine, just a bit less efficient. This is
	// because the token attribute will be generated from the matched input
	// sequence every time it is requested. But as soon as you specify at
	// least one token attribute type you'll have to list all attribute types
	// used for token_def<> declarations in the token definition class above,
	// otherwise compilation errors will occur.
	typedef lex::lexertl::token<
	base_iterator_type, boost::mpl::vector<unsigned int, std::string>
	> token_type;

	// Here we use the lexertl based lexer engine.
	typedef lex::lexertl::lexer<token_type> lexer_type;

	// This is the token definition type (derived from the given lexer type).
	typedef example5_tokens<lexer_type> example5_tokens;

	// this is the iterator type exposed by the lexer
	typedef example5_tokens::iterator_type iterator_type;

	// this is the type of the grammar to parse
	typedef example5_grammar<iterator_type, example5_tokens::lexer_def> example5_grammar;

	// now we use the types defined above to create the lexer and grammar
	// object instances needed to invoke the parsing process
	example5_tokens tokens; // Our lexer
	example5_grammar calc(tokens); // Our parser

	std::string str (read_from_file("example5.input"));

	// At this point we generate the iterator pair used to expose the
	// tokenized input stream.
	std::string::iterator it = str.begin();
	iterator_type iter = tokens.begin(it, str.end());
	iterator_type end = tokens.end();

	// Parsing is done based on the the token stream, not the character
	// stream read from the input.
	// Note how we use the lexer defined above as the skip parser. It must
	// be explicitly wrapped inside a state directive, switching the lexer
	// state for the duration of skipping whitespace.
	std::string ws("WS");
	bool r = qi::phrase_parse(iter, end, calc, qi::in_state(ws)[tokens.self]);

	if (r && iter == end)
	{
	std::cout << "-------------------------\n";
	std::cout << "Parsing succeeded\n";
	std::cout << "-------------------------\n";
	}
	else
	{
	std::cout << "-------------------------\n";
	std::cout << "Parsing failed\n";
	std::cout << "-------------------------\n";
	}

	std::cout << "Bye... :-) \n\n";
	return 0;
	}