| // Copyright (c) 2001-2010 Hartmut Kaiser |
| // |
| // Distributed under the Boost Software License, Version 1.0. (See accompanying |
| // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| |
| // The purpose of this example is to show, how it is possible to use a lexer |
| // token definition for two purposes: |
| // |
| // . To generate C++ code implementing a static lexical analyzer allowing |
| // to recognize all defined tokens |
| // . To integrate the generated C++ lexer into the /Spirit/ framework. |
| // |
| |
| // #define BOOST_SPIRIT_LEXERTL_DEBUG |
| #define BOOST_VARIANT_MINIMIZE_SIZE |
| |
| #include <boost/config/warning_disable.hpp> |
| #include <boost/spirit/include/qi.hpp> |
| //[wc_static_include |
| #include <boost/spirit/include/lex_static_lexertl.hpp> |
| //] |
| #include <boost/spirit/include/phoenix_operator.hpp> |
| #include <boost/spirit/include/phoenix_statement.hpp> |
| #include <boost/spirit/include/phoenix_container.hpp> |
| |
| #include <iostream> |
| #include <string> |
| |
| #include "../example.hpp" |
| #include "word_count_tokens.hpp" // token definition |
| #include "word_count_static.hpp" // generated tokenizer |
| |
| using namespace boost::spirit; |
| using namespace boost::spirit::ascii; |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // Grammar definition |
| /////////////////////////////////////////////////////////////////////////////// |
| //[wc_static_grammar |
| // This is an ordinary grammar definition following the rules defined by |
| // Spirit.Qi. There is nothing specific about it, except it gets the token |
| // definition class instance passed to the constructor to allow accessing the |
| // embedded token_def<> instances. |
| template <typename Iterator> |
| struct word_count_grammar : qi::grammar<Iterator> |
| { |
| template <typename TokenDef> |
| word_count_grammar(TokenDef const& tok) |
| : word_count_grammar::base_type(start) |
| , c(0), w(0), l(0) |
| { |
| using boost::phoenix::ref; |
| using boost::phoenix::size; |
| |
| // associate the defined tokens with the lexer, at the same time |
| // defining the actions to be executed |
| start = *( tok.word [ ++ref(w), ref(c) += size(_1) ] |
| | lit('\n') [ ++ref(l), ++ref(c) ] |
| | qi::token(IDANY) [ ++ref(c) ] |
| ) |
| ; |
| } |
| |
| std::size_t c, w, l; // counter for characters, words, and lines |
| qi::rule<Iterator> start; |
| }; |
| //] |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| //[wc_static_main |
| int main(int argc, char* argv[]) |
| { |
| // Define the token type to be used: 'std::string' is available as the type |
| // of the token value. |
| typedef lex::lexertl::token< |
| char const*, boost::mpl::vector<std::string> |
| > token_type; |
| |
| // Define the lexer type to be used as the base class for our token |
| // definition. |
| // |
| // This is the only place where the code is different from an equivalent |
| // dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of |
| // the `lexertl::lexer<>` as the base class for our token defintion type. |
| // |
| // As we specified the suffix "wc" while generating the static tables we |
| // need to pass the type lexertl::static_::lexer_wc as the second template |
| // parameter below (see word_count_generate.cpp). |
| typedef lex::lexertl::static_lexer< |
| token_type, lex::lexertl::static_::lexer_wc |
| > lexer_type; |
| |
| // Define the iterator type exposed by the lexer. |
| typedef word_count_tokens<lexer_type>::iterator_type iterator_type; |
| |
| // Now we use the types defined above to create the lexer and grammar |
| // object instances needed to invoke the parsing process. |
| word_count_tokens<lexer_type> word_count; // Our lexer |
| word_count_grammar<iterator_type> g (word_count); // Our parser |
| |
| // Read in the file into memory. |
| std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); |
| char const* first = str.c_str(); |
| char const* last = &first[str.size()]; |
| |
| // Parsing is done based on the the token stream, not the character stream. |
| bool r = lex::tokenize_and_parse(first, last, word_count, g); |
| |
| if (r) { // success |
| std::cout << "lines: " << g.l << ", words: " << g.w |
| << ", characters: " << g.c << "\n"; |
| } |
| else { |
| std::string rest(first, last); |
| std::cerr << "Parsing failed\n" << "stopped at: \"" |
| << rest << "\"\n"; |
| } |
| return 0; |
| } |
| //] |