boost_1_45_0/libs/spirit/example/lex/example2.cpp - nest-learning-thermostat/5.1.3/boost - Git at Google

 //  Copyright (c) 2001-2010 Hartmut Kaiser
 //
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

 //  This example shows how to create a simple lexer recognizing a couple of
 //  different tokens and how to use this with a grammar. This example has a
 //  heavily backtracking grammar which makes it a candidate for lexer based
 //  parsing (all tokens are scanned and generated only once, even if
 //  backtracking is required) which speeds up the overall parsing process
 //  considerably, out-weighting the overhead needed for setting up the lexer.
 //  Additionally it demonstrates how to use one of the defined tokens as a
 //  parser component in the grammar.
 //
 //  The grammar recognizes a simple input structure: any number of English
 //  simple sentences (statements, questions and commands) are recognized and
 //  are being counted separately.

 // #define BOOST_SPIRIT_DEBUG
 // #define BOOST_SPIRIT_LEXERTL_DEBUG

 #include <boost/config/warning_disable.hpp>
 #include <boost/spirit/include/qi.hpp>
 #include <boost/spirit/include/lex_lexertl.hpp>
 #include <boost/spirit/include/phoenix_operator.hpp>

 #include <iostream>
 #include <fstream>
 #include <string>

 #include "example.hpp"

 using namespace boost::spirit;
 using namespace boost::spirit::ascii;
 using boost::phoenix::ref;

 ///////////////////////////////////////////////////////////////////////////////
 //  Token definition
 ///////////////////////////////////////////////////////////////////////////////
 template <typename Lexer>
 struct example2_tokens : lex::lexer<Lexer>
 {
     example2_tokens()
     {
         //  A 'word' is comprised of one or more letters and an optional
         //  apostrophe. If it contains an apostrophe, there may only be one and
         //  the apostrophe must be preceded and succeeded by at least 1 letter.
         //  For example, "I'm" and "doesn't" meet the definition of 'word' we
         //  define below.
         word = "[a-zA-Z]+('[a-zA-Z]+)?";

         // Associate the tokens and the token set with the lexer. Note that
         // single character token definitions as used below always get
         // interpreted literally and never as special regex characters. This is
         // done to be able to assign single characters the id of their character
         // code value, allowing to reference those as literals in Qi grammars.
         this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
     }

     lex::token_def<> word;
 };

 ///////////////////////////////////////////////////////////////////////////////
 //  Grammar definition
 ///////////////////////////////////////////////////////////////////////////////
 template <typename Iterator>
 struct example2_grammar : qi::grammar<Iterator>
 {
     template <typename TokenDef>
     example2_grammar(TokenDef const& tok)
       : example2_grammar::base_type(story)
       , paragraphs(0), commands(0), questions(0), statements(0)
     {
         story
             =  +paragraph
             ;

         paragraph
             =   (  +(   command [ ++ref(commands) ]
                     |   question [ ++ref(questions) ]
                     |   statement [ ++ref(statements) ]
                     )
                     >> *char_(' ') >> +char_('\n')
                 )
                 [ ++ref(paragraphs) ]
             ;

         command
             =  +(tok.word | ' ' | ',') >> '!'
             ;

         question
             =  +(tok.word | ' ' | ',') >> '?'
             ;

         statement
             =  +(tok.word | ' ' | ',') >> '.'
             ;

         BOOST_SPIRIT_DEBUG_NODE(story);
         BOOST_SPIRIT_DEBUG_NODE(paragraph);
         BOOST_SPIRIT_DEBUG_NODE(command);
         BOOST_SPIRIT_DEBUG_NODE(question);
         BOOST_SPIRIT_DEBUG_NODE(statement);
     }

     qi::rule<Iterator> story, paragraph, command, question, statement;
     int paragraphs, commands, questions, statements;
 };

 ///////////////////////////////////////////////////////////////////////////////
 int main()
 {
     // iterator type used to expose the underlying input stream
     typedef std::string::iterator base_iterator_type;

     // This is the token type to return from the lexer iterator
     typedef lex::lexertl::token<base_iterator_type> token_type;

     // This is the lexer type to use to tokenize the input.
     // Here we use the lexertl based lexer engine.
     typedef lex::lexertl::lexer<token_type> lexer_type;

     // This is the token definition type (derived from the given lexer type).
     typedef example2_tokens<lexer_type> example2_tokens;

     // this is the iterator type exposed by the lexer
     typedef example2_tokens::iterator_type iterator_type;

     // this is the type of the grammar to parse
     typedef example2_grammar<iterator_type> example2_grammar;

     // now we use the types defined above to create the lexer and grammar
     // object instances needed to invoke the parsing process
     example2_tokens tokens;                         // Our lexer
     example2_grammar calc(tokens);                  // Our parser

     std::string str (read_from_file("example2.input"));

     // At this point we generate the iterator pair used to expose the
     // tokenized input stream.
     std::string::iterator it = str.begin();
     iterator_type iter = tokens.begin(it, str.end());
     iterator_type end = tokens.end();

     // Parsing is done based on the the token stream, not the character
     // stream read from the input.
     bool r = qi::parse(iter, end, calc);

     if (r && iter == end)
     {
         std::cout << "-------------------------\n";
         std::cout << "Parsing succeeded\n";
         std::cout << "There were "
                   << calc.commands << " commands, "
                   << calc.questions << " questions, and "
                   << calc.statements << " statements.\n";
         std::cout << "-------------------------\n";
     }
     else
     {
         std::cout << "-------------------------\n";
         std::cout << "Parsing failed\n";
         std::cout << "-------------------------\n";
     }

     std::cout << "Bye... :-) \n\n";
     return 0;
 }
	// Copyright (c) 2001-2010 Hartmut Kaiser
	//
	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

	// This example shows how to create a simple lexer recognizing a couple of
	// different tokens and how to use this with a grammar. This example has a
	// heavily backtracking grammar which makes it a candidate for lexer based
	// parsing (all tokens are scanned and generated only once, even if
	// backtracking is required) which speeds up the overall parsing process
	// considerably, out-weighting the overhead needed for setting up the lexer.
	// Additionally it demonstrates how to use one of the defined tokens as a
	// parser component in the grammar.
	//
	// The grammar recognizes a simple input structure: any number of English
	// simple sentences (statements, questions and commands) are recognized and
	// are being counted separately.

	// #define BOOST_SPIRIT_DEBUG
	// #define BOOST_SPIRIT_LEXERTL_DEBUG

	#include <boost/config/warning_disable.hpp>
	#include <boost/spirit/include/qi.hpp>
	#include <boost/spirit/include/lex_lexertl.hpp>
	#include <boost/spirit/include/phoenix_operator.hpp>

	#include <iostream>
	#include <fstream>
	#include <string>

	#include "example.hpp"

	using namespace boost::spirit;
	using namespace boost::spirit::ascii;
	using boost::phoenix::ref;

	///////////////////////////////////////////////////////////////////////////////
	// Token definition
	///////////////////////////////////////////////////////////////////////////////
	template <typename Lexer>
	struct example2_tokens : lex::lexer<Lexer>
	{
	example2_tokens()
	{
	// A 'word' is comprised of one or more letters and an optional
	// apostrophe. If it contains an apostrophe, there may only be one and
	// the apostrophe must be preceded and succeeded by at least 1 letter.
	// For example, "I'm" and "doesn't" meet the definition of 'word' we
	// define below.
	word = "[a-zA-Z]+('[a-zA-Z]+)?";

	// Associate the tokens and the token set with the lexer. Note that
	// single character token definitions as used below always get
	// interpreted literally and never as special regex characters. This is
	// done to be able to assign single characters the id of their character
	// code value, allowing to reference those as literals in Qi grammars.
	this->self = lex::token_def<>(',') \| '!' \| '.' \| '?' \| ' ' \| '\n' \| word;
	}

	lex::token_def<> word;
	};

	///////////////////////////////////////////////////////////////////////////////
	// Grammar definition
	///////////////////////////////////////////////////////////////////////////////
	template <typename Iterator>
	struct example2_grammar : qi::grammar<Iterator>
	{
	template <typename TokenDef>
	example2_grammar(TokenDef const& tok)
	: example2_grammar::base_type(story)
	, paragraphs(0), commands(0), questions(0), statements(0)
	{
	story
	= +paragraph
	;

	paragraph
	= ( +( command [ ++ref(commands) ]
	\| question [ ++ref(questions) ]
	\| statement [ ++ref(statements) ]
	)
	>> *char_(' ') >> +char_('\n')
	)
	[ ++ref(paragraphs) ]
	;

	command
	= +(tok.word \| ' ' \| ',') >> '!'
	;

	question
	= +(tok.word \| ' ' \| ',') >> '?'
	;

	statement
	= +(tok.word \| ' ' \| ',') >> '.'
	;

	BOOST_SPIRIT_DEBUG_NODE(story);
	BOOST_SPIRIT_DEBUG_NODE(paragraph);
	BOOST_SPIRIT_DEBUG_NODE(command);
	BOOST_SPIRIT_DEBUG_NODE(question);
	BOOST_SPIRIT_DEBUG_NODE(statement);
	}

	qi::rule<Iterator> story, paragraph, command, question, statement;
	int paragraphs, commands, questions, statements;
	};

	///////////////////////////////////////////////////////////////////////////////
	int main()
	{
	// iterator type used to expose the underlying input stream
	typedef std::string::iterator base_iterator_type;

	// This is the token type to return from the lexer iterator
	typedef lex::lexertl::token<base_iterator_type> token_type;

	// This is the lexer type to use to tokenize the input.
	// Here we use the lexertl based lexer engine.
	typedef lex::lexertl::lexer<token_type> lexer_type;

	// This is the token definition type (derived from the given lexer type).
	typedef example2_tokens<lexer_type> example2_tokens;

	// this is the iterator type exposed by the lexer
	typedef example2_tokens::iterator_type iterator_type;

	// this is the type of the grammar to parse
	typedef example2_grammar<iterator_type> example2_grammar;

	// now we use the types defined above to create the lexer and grammar
	// object instances needed to invoke the parsing process
	example2_tokens tokens; // Our lexer
	example2_grammar calc(tokens); // Our parser

	std::string str (read_from_file("example2.input"));

	// At this point we generate the iterator pair used to expose the
	// tokenized input stream.
	std::string::iterator it = str.begin();
	iterator_type iter = tokens.begin(it, str.end());
	iterator_type end = tokens.end();

	// Parsing is done based on the the token stream, not the character
	// stream read from the input.
	bool r = qi::parse(iter, end, calc);

	if (r && iter == end)
	{
	std::cout << "-------------------------\n";
	std::cout << "Parsing succeeded\n";
	std::cout << "There were "
	<< calc.commands << " commands, "
	<< calc.questions << " questions, and "
	<< calc.statements << " statements.\n";
	std::cout << "-------------------------\n";
	}
	else
	{
	std::cout << "-------------------------\n";
	std::cout << "Parsing failed\n";
	std::cout << "-------------------------\n";
	}

	std::cout << "Bye... :-) \n\n";
	return 0;
	}