boost_1_45_0/libs/spirit/example/lex/word_count.cpp - nest-learning-thermostat/5.0/boost - Git at Google

 //  Copyright (c) 2001-2010 Hartmut Kaiser
 //
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

 //  This example is the equivalent to the following lex program:
 /*
 //[wcp_flex_version
     %{
         int c = 0, w = 0, l = 0;
     %}
     word   [^ \t\n]+
     eol    \n
     %%
     {word} { ++w; c += yyleng; }
     {eol}  { ++c; ++l; }
     .      { ++c; }
     %%
     main()
     {
         yylex();
         printf("%d %d %d\n", l, w, c);
     }
 //]
 */
 //  Its purpose is to do the word count function of the wc command in UNIX. It
 //  prints the number of lines, words and characters in a file.
 //
 //  The example additionally demonstrates how to use the add_pattern(...)(...)
 //  syntax to define lexer patterns. These patterns are essentially parameter-
 //  less 'macros' for regular expressions, allowing to simplify their
 //  definition.

 // #define BOOST_SPIRIT_LEXERTL_DEBUG
 #define BOOST_VARIANT_MINIMIZE_SIZE

 #include <boost/config/warning_disable.hpp>
 //[wcp_includes
 #include <boost/spirit/include/qi.hpp>
 #include <boost/spirit/include/lex_lexertl.hpp>
 #include <boost/spirit/include/phoenix_operator.hpp>
 #include <boost/spirit/include/phoenix_statement.hpp>
 #include <boost/spirit/include/phoenix_container.hpp>
 //]

 #include <iostream>
 #include <string>

 #include "example.hpp"

 //[wcp_namespaces
 using namespace boost::spirit;
 using namespace boost::spirit::ascii;
 //]

 ///////////////////////////////////////////////////////////////////////////////
 //  Token definition: We use the lexertl based lexer engine as the underlying
 //                    lexer type.
 ///////////////////////////////////////////////////////////////////////////////
 //[wcp_token_ids
 enum tokenids
 {
     IDANY = lex::min_token_id + 10
 };
 //]

 //[wcp_token_definition
 template <typename Lexer>
 struct word_count_tokens : lex::lexer<Lexer>
 {
     word_count_tokens()
     {
         // define patterns (lexer macros) to be used during token definition
         // below
         this->self.add_pattern
             ("WORD", "[^ \t\n]+")
         ;

         // define tokens and associate them with the lexer
         word = "{WORD}";    // reference the pattern 'WORD' as defined above

         // this lexer will recognize 3 token types: words, newlines, and
         // everything else
         this->self.add
             (word)          // no token id is needed here
             ('\n')          // characters are usable as tokens as well
             (".", IDANY)    // string literals will not be esacped by the library
         ;
     }

     // the token 'word' exposes the matched string as its parser attribute
     lex::token_def<std::string> word;
 };
 //]

 ///////////////////////////////////////////////////////////////////////////////
 //  Grammar definition
 ///////////////////////////////////////////////////////////////////////////////
 //[wcp_grammar_definition
 template <typename Iterator>
 struct word_count_grammar : qi::grammar<Iterator>
 {
     template <typename TokenDef>
     word_count_grammar(TokenDef const& tok)
       : word_count_grammar::base_type(start)
       , c(0), w(0), l(0)
     {
         using boost::phoenix::ref;
         using boost::phoenix::size;

         start =  *(   tok.word          [++ref(w), ref(c) += size(_1)]
                   |   lit('\n')         [++ref(c), ++ref(l)]
                   |   qi::token(IDANY)  [++ref(c)]
                   )
               ;
     }

     std::size_t c, w, l;
     qi::rule<Iterator> start;
 };
 //]

 ///////////////////////////////////////////////////////////////////////////////
 //[wcp_main
 int main(int argc, char* argv[])
 {
 /*<  Define the token type to be used: `std::string` is available as the
      type of the token attribute
 >*/  typedef lex::lexertl::token<
         char const*, boost::mpl::vector<std::string>
     > token_type;

 /*<  Define the lexer type to use implementing the state machine
 >*/  typedef lex::lexertl::lexer<token_type> lexer_type;

 /*<  Define the iterator type exposed by the lexer type
 >*/  typedef word_count_tokens<lexer_type>::iterator_type iterator_type;

     // now we use the types defined above to create the lexer and grammar
     // object instances needed to invoke the parsing process
     word_count_tokens<lexer_type> word_count;          // Our lexer
     word_count_grammar<iterator_type> g (word_count);  // Our parser

     // read in the file int memory
     std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
     char const* first = str.c_str();
     char const* last = &first[str.size()];

 /*<  Parsing is done based on the the token stream, not the character
      stream read from the input. The function `tokenize_and_parse()` wraps
      the passed iterator range `[first, last)` by the lexical analyzer and
      uses its exposed iterators to parse the toke stream.
 >*/  bool r = lex::tokenize_and_parse(first, last, word_count, g);

     if (r) {
         std::cout << "lines: " << g.l << ", words: " << g.w
                   << ", characters: " << g.c << "\n";
     }
     else {
         std::string rest(first, last);
         std::cerr << "Parsing failed\n" << "stopped at: \""
                   << rest << "\"\n";
     }
     return 0;
 }
 //]
	// Copyright (c) 2001-2010 Hartmut Kaiser
	//
	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

	// This example is the equivalent to the following lex program:
	/*
	//[wcp_flex_version
	%{
	int c = 0, w = 0, l = 0;
	%}
	word [^ \t\n]+
	eol \n
	%%
	{word} { ++w; c += yyleng; }
	{eol} { ++c; ++l; }
	. { ++c; }
	%%
	main()
	{
	yylex();
	printf("%d %d %d\n", l, w, c);
	}
	//]
	*/
	// Its purpose is to do the word count function of the wc command in UNIX. It
	// prints the number of lines, words and characters in a file.
	//
	// The example additionally demonstrates how to use the add_pattern(...)(...)
	// syntax to define lexer patterns. These patterns are essentially parameter-
	// less 'macros' for regular expressions, allowing to simplify their
	// definition.

	// #define BOOST_SPIRIT_LEXERTL_DEBUG
	#define BOOST_VARIANT_MINIMIZE_SIZE

	#include <boost/config/warning_disable.hpp>
	//[wcp_includes
	#include <boost/spirit/include/qi.hpp>
	#include <boost/spirit/include/lex_lexertl.hpp>
	#include <boost/spirit/include/phoenix_operator.hpp>
	#include <boost/spirit/include/phoenix_statement.hpp>
	#include <boost/spirit/include/phoenix_container.hpp>
	//]

	#include <iostream>
	#include <string>

	#include "example.hpp"

	//[wcp_namespaces
	using namespace boost::spirit;
	using namespace boost::spirit::ascii;
	//]

	///////////////////////////////////////////////////////////////////////////////
	// Token definition: We use the lexertl based lexer engine as the underlying
	// lexer type.
	///////////////////////////////////////////////////////////////////////////////
	//[wcp_token_ids
	enum tokenids
	{
	IDANY = lex::min_token_id + 10
	};
	//]

	//[wcp_token_definition
	template <typename Lexer>
	struct word_count_tokens : lex::lexer<Lexer>
	{
	word_count_tokens()
	{
	// define patterns (lexer macros) to be used during token definition
	// below
	this->self.add_pattern
	("WORD", "[^ \t\n]+")
	;

	// define tokens and associate them with the lexer
	word = "{WORD}"; // reference the pattern 'WORD' as defined above

	// this lexer will recognize 3 token types: words, newlines, and
	// everything else
	this->self.add
	(word) // no token id is needed here
	('\n') // characters are usable as tokens as well
	(".", IDANY) // string literals will not be esacped by the library
	;
	}

	// the token 'word' exposes the matched string as its parser attribute
	lex::token_def<std::string> word;
	};
	//]

	///////////////////////////////////////////////////////////////////////////////
	// Grammar definition
	///////////////////////////////////////////////////////////////////////////////
	//[wcp_grammar_definition
	template <typename Iterator>
	struct word_count_grammar : qi::grammar<Iterator>
	{
	template <typename TokenDef>
	word_count_grammar(TokenDef const& tok)
	: word_count_grammar::base_type(start)
	, c(0), w(0), l(0)
	{
	using boost::phoenix::ref;
	using boost::phoenix::size;

	start = *( tok.word [++ref(w), ref(c) += size(_1)]
	\| lit('\n') [++ref(c), ++ref(l)]
	\| qi::token(IDANY) [++ref(c)]
	)
	;
	}

	std::size_t c, w, l;
	qi::rule<Iterator> start;
	};
	//]

	///////////////////////////////////////////////////////////////////////////////
	//[wcp_main
	int main(int argc, char* argv[])
	{
	/*< Define the token type to be used: `std::string` is available as the
	type of the token attribute
	>*/ typedef lex::lexertl::token<
	char const*, boost::mpl::vector<std::string>
	> token_type;

	/*< Define the lexer type to use implementing the state machine
	>*/ typedef lex::lexertl::lexer<token_type> lexer_type;

	/*< Define the iterator type exposed by the lexer type
	>*/ typedef word_count_tokens<lexer_type>::iterator_type iterator_type;

	// now we use the types defined above to create the lexer and grammar
	// object instances needed to invoke the parsing process
	word_count_tokens<lexer_type> word_count; // Our lexer
	word_count_grammar<iterator_type> g (word_count); // Our parser

	// read in the file int memory
	std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
	char const* first = str.c_str();
	char const* last = &first[str.size()];

	/*< Parsing is done based on the the token stream, not the character
	stream read from the input. The function `tokenize_and_parse()` wraps
	the passed iterator range `[first, last)` by the lexical analyzer and
	uses its exposed iterators to parse the toke stream.
	>*/ bool r = lex::tokenize_and_parse(first, last, word_count, g);

	if (r) {
	std::cout << "lines: " << g.l << ", words: " << g.w
	<< ", characters: " << g.c << "\n";
	}
	else {
	std::string rest(first, last);
	std::cerr << "Parsing failed\n" << "stopped at: \""
	<< rest << "\"\n";
	}
	return 0;
	}
	//]