| /*============================================================================= |
| Boost.Wave: A Standard compliant C++ preprocessor library |
| |
| Re2C based C++ lexer |
| |
| http://www.boost.org/ |
| |
| Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost |
| Software License, Version 1.0. (See accompanying file |
| LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| =============================================================================*/ |
| |
| #if !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED) |
| #define CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED |
| |
| #include <string> |
| #include <cstdio> |
| #include <cstdarg> |
| #if defined(BOOST_SPIRIT_DEBUG) |
| #include <iostream> |
| #endif // defined(BOOST_SPIRIT_DEBUG) |
| |
| #include <boost/concept_check.hpp> |
| #include <boost/assert.hpp> |
| #include <boost/spirit/include/classic_core.hpp> |
| |
| #include <boost/wave/wave_config.hpp> |
| #include <boost/wave/language_support.hpp> |
| #include <boost/wave/token_ids.hpp> |
| #include <boost/wave/util/file_position.hpp> |
| #include <boost/wave/cpplexer/validate_universal_char.hpp> |
| #include <boost/wave/cpplexer/cpplexer_exceptions.hpp> |
| #include <boost/wave/cpplexer/token_cache.hpp> |
| #include <boost/wave/cpplexer/convert_trigraphs.hpp> |
| |
| #include <boost/wave/cpplexer/cpp_lex_interface.hpp> |
| #include <boost/wave/cpplexer/re2clex/scanner.hpp> |
| #include <boost/wave/cpplexer/re2clex/cpp_re.hpp> |
| #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 |
| #include <boost/wave/cpplexer/detect_include_guards.hpp> |
| #endif |
| |
| #include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp> |
| |
| // this must occur after all of the includes and before any code appears |
| #ifdef BOOST_HAS_ABI_HEADERS |
| #include BOOST_ABI_PREFIX |
| #endif |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| namespace boost { |
| namespace wave { |
| namespace cpplexer { |
| namespace re2clex { |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // |
| // encapsulation of the re2c based cpp lexer |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| template <typename IteratorT, |
| typename PositionT = boost::wave::util::file_position_type, |
| typename TokenT = lex_token<PositionT> > |
| class lexer |
| { |
| public: |
| typedef TokenT token_type; |
| typedef typename token_type::string_type string_type; |
| |
| lexer(IteratorT const &first, IteratorT const &last, |
| PositionT const &pos, boost::wave::language_support language_); |
| ~lexer(); |
| |
| token_type& get(token_type&); |
| void set_position(PositionT const &pos) |
| { |
| // set position has to change the file name and line number only |
| filename = pos.get_file(); |
| scanner.line = pos.get_line(); |
| // scanner.column = scanner.curr_column = pos.get_column(); |
| scanner.file_name = filename.c_str(); |
| } |
| #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 |
| bool has_include_guards(std::string& guard_name) const |
| { |
| return guards.detected(guard_name); |
| } |
| #endif |
| |
| // error reporting from the re2c generated lexer |
| static int report_error(Scanner const* s, int code, char const *, ...); |
| |
| private: |
| static char const *tok_names[]; |
| |
| Scanner scanner; |
| string_type filename; |
| string_type value; |
| bool at_eof; |
| boost::wave::language_support language; |
| #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 |
| include_guards<token_type> guards; |
| #endif |
| |
| #if BOOST_WAVE_SUPPORT_THREADING == 0 |
| static token_cache<string_type> const cache; |
| #else |
| token_cache<string_type> const cache; |
| #endif |
| }; |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // initialize cpp lexer |
| template <typename IteratorT, typename PositionT, typename TokenT> |
| inline |
| lexer<IteratorT, PositionT, TokenT>::lexer(IteratorT const &first, |
| IteratorT const &last, PositionT const &pos, |
| boost::wave::language_support language_) |
| : filename(pos.get_file()), at_eof(false), language(language_) |
| #if BOOST_WAVE_SUPPORT_THREADING != 0 |
| , cache() |
| #endif |
| { |
| using namespace std; // some systems have memset in std |
| memset(&scanner, '\0', sizeof(Scanner)); |
| scanner.eol_offsets = aq_create(); |
| if (first != last) { |
| scanner.first = scanner.act = (uchar *)&(*first); |
| scanner.last = scanner.first + std::distance(first, last); |
| } |
| scanner.line = pos.get_line(); |
| scanner.column = scanner.curr_column = pos.get_column(); |
| scanner.error_proc = report_error; |
| scanner.file_name = filename.c_str(); |
| |
| #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 |
| scanner.enable_ms_extensions = true; |
| #else |
| scanner.enable_ms_extensions = false; |
| #endif |
| |
| #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0 |
| scanner.act_in_c99_mode = boost::wave::need_c99(language_); |
| #endif |
| |
| #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0 |
| scanner.enable_import_keyword = !boost::wave::need_c99(language_); |
| #else |
| scanner.enable_import_keyword = false; |
| #endif |
| |
| scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_); |
| scanner.single_line_only = boost::wave::need_single_line(language_); |
| } |
| |
| template <typename IteratorT, typename PositionT, typename TokenT> |
| inline |
| lexer<IteratorT, PositionT, TokenT>::~lexer() |
| { |
| using namespace std; // some systems have free in std |
| aq_terminate(scanner.eol_offsets); |
| free(scanner.bot); |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // get the next token from the input stream |
| template <typename IteratorT, typename PositionT, typename TokenT> |
| inline TokenT& |
| lexer<IteratorT, PositionT, TokenT>::get(TokenT& result) |
| { |
| if (at_eof) |
| return result = token_type(); // return T_EOI |
| |
| unsigned int actline = scanner.line; |
| token_id id = token_id(scan(&scanner)); |
| |
| switch (static_cast<unsigned int>(id)) { |
| case T_IDENTIFIER: |
| // test identifier characters for validity (throws if invalid chars found) |
| value = string_type((char const *)scanner.tok, |
| scanner.cur-scanner.tok); |
| if (!boost::wave::need_no_character_validation(language)) |
| impl::validate_identifier_name(value, actline, scanner.column, filename); |
| break; |
| |
| case T_STRINGLIT: |
| case T_CHARLIT: |
| // test literal characters for validity (throws if invalid chars found) |
| value = string_type((char const *)scanner.tok, |
| scanner.cur-scanner.tok); |
| if (boost::wave::need_convert_trigraphs(language)) |
| value = impl::convert_trigraphs(value); |
| if (!boost::wave::need_no_character_validation(language)) |
| impl::validate_literal(value, actline, scanner.column, filename); |
| break; |
| |
| #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 |
| case T_PP_HHEADER: |
| case T_PP_QHEADER: |
| case T_PP_INCLUDE: |
| // convert to the corresponding ..._next token, if appropriate |
| { |
| value = string_type((char const *)scanner.tok, |
| scanner.cur-scanner.tok); |
| |
| // Skip '#' and whitespace and see whether we find an 'include_next' here. |
| typename string_type::size_type start = value.find("include"); |
| if (value.compare(start, 12, "include_next", 12) == 0) |
| id = token_id(id | AltTokenType); |
| break; |
| } |
| #endif |
| |
| case T_LONGINTLIT: // supported in C99 and long_long mode |
| value = string_type((char const *)scanner.tok, |
| scanner.cur-scanner.tok); |
| if (!boost::wave::need_long_long(language)) { |
| // syntax error: not allowed in C++ mode |
| BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal, |
| value.c_str(), actline, scanner.column, filename.c_str()); |
| } |
| break; |
| |
| case T_OCTALINT: |
| case T_DECIMALINT: |
| case T_HEXAINT: |
| case T_INTLIT: |
| case T_FLOATLIT: |
| case T_FIXEDPOINTLIT: |
| case T_CCOMMENT: |
| case T_CPPCOMMENT: |
| case T_SPACE: |
| case T_SPACE2: |
| case T_ANY: |
| case T_PP_NUMBER: |
| value = string_type((char const *)scanner.tok, |
| scanner.cur-scanner.tok); |
| break; |
| |
| case T_EOF: |
| // T_EOF is returned as a valid token, the next call will return T_EOI, |
| // i.e. the actual end of input |
| at_eof = true; |
| value.clear(); |
| break; |
| |
| case T_OR_TRIGRAPH: |
| case T_XOR_TRIGRAPH: |
| case T_LEFTBRACE_TRIGRAPH: |
| case T_RIGHTBRACE_TRIGRAPH: |
| case T_LEFTBRACKET_TRIGRAPH: |
| case T_RIGHTBRACKET_TRIGRAPH: |
| case T_COMPL_TRIGRAPH: |
| case T_POUND_TRIGRAPH: |
| if (boost::wave::need_convert_trigraphs(language)) { |
| value = cache.get_token_value(BASEID_FROM_TOKEN(id)); |
| } |
| else { |
| value = string_type((char const *)scanner.tok, |
| scanner.cur-scanner.tok); |
| } |
| break; |
| |
| case T_ANY_TRIGRAPH: |
| if (boost::wave::need_convert_trigraphs(language)) { |
| value = impl::convert_trigraph( |
| string_type((char const *)scanner.tok)); |
| } |
| else { |
| value = string_type((char const *)scanner.tok, |
| scanner.cur-scanner.tok); |
| } |
| break; |
| |
| default: |
| if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) || |
| IS_CATEGORY(id, UnknownTokenType)) |
| { |
| value = string_type((char const *)scanner.tok, |
| scanner.cur-scanner.tok); |
| } |
| else { |
| value = cache.get_token_value(id); |
| } |
| break; |
| } |
| |
| // std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl; |
| |
| // the re2c lexer reports the new line number for newline tokens |
| result = token_type(id, value, PositionT(filename, actline, scanner.column)); |
| |
| #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 |
| return guards.detect_guard(result); |
| #else |
| return result; |
| #endif |
| } |
| |
| template <typename IteratorT, typename PositionT, typename TokenT> |
| inline int |
| lexer<IteratorT, PositionT, TokenT>::report_error(Scanner const *s, int errcode, |
| char const *msg, ...) |
| { |
| BOOST_ASSERT(0 != s); |
| BOOST_ASSERT(0 != msg); |
| |
| using namespace std; // some system have vsprintf in namespace std |
| |
| char buffer[200]; // should be large enough |
| va_list params; |
| va_start(params, msg); |
| vsprintf(buffer, msg, params); |
| va_end(params); |
| |
| BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line, |
| s->column, s->file_name); |
| // BOOST_UNREACHABLE_RETURN(0); |
| return 0; |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // |
| // lex_functor |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| template <typename IteratorT, |
| typename PositionT = boost::wave::util::file_position_type, |
| typename TokenT = typename lexer<IteratorT, PositionT>::token_type> |
| class lex_functor |
| : public lex_input_interface_generator<TokenT> |
| { |
| public: |
| typedef TokenT token_type; |
| |
| lex_functor(IteratorT const &first, IteratorT const &last, |
| PositionT const &pos, boost::wave::language_support language) |
| : re2c_lexer(first, last, pos, language) |
| {} |
| virtual ~lex_functor() {} |
| |
| // get the next token from the input stream |
| token_type& get(token_type& result) { return re2c_lexer.get(result); } |
| void set_position(PositionT const &pos) { re2c_lexer.set_position(pos); } |
| #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 |
| bool has_include_guards(std::string& guard_name) const |
| { return re2c_lexer.has_include_guards(guard_name); } |
| #endif |
| |
| private: |
| lexer<IteratorT, PositionT, TokenT> re2c_lexer; |
| }; |
| |
| #if BOOST_WAVE_SUPPORT_THREADING == 0 |
| /////////////////////////////////////////////////////////////////////////////// |
| template <typename IteratorT, typename PositionT, typename TokenT> |
| token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type> const |
| lexer<IteratorT, PositionT, TokenT>::cache = |
| token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type>(); |
| #endif |
| |
| } // namespace re2clex |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // |
| // The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp) |
| // should be defined inline, if the lex_functor shouldn't be instantiated |
| // separately from the lex_iterator. |
| // |
| // Separate (explicit) instantiation helps to reduce compilation time. |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0 |
| #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE |
| #else |
| #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline |
| #endif |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // |
| // The 'new_lexer' function allows the opaque generation of a new lexer object. |
| // It is coupled to the iterator type to allow to decouple the lexer/iterator |
| // configurations at compile time. |
| // |
| // This function is declared inside the cpp_lex_token.hpp file, which is |
| // referenced by the source file calling the lexer and the source file, which |
| // instantiates the lex_functor. But is is defined here, so it will be |
| // instantiated only while compiling the source file, which instantiates the |
| // lex_functor. While the cpp_re2c_token.hpp file may be included everywhere, |
| // this file (cpp_re2c_lexer.hpp) should be included only once. This allows |
| // to decouple the lexer interface from the lexer implementation and reduces |
| // compilation time. |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| template <typename IteratorT, typename PositionT, typename TokenT> |
| BOOST_WAVE_RE2C_NEW_LEXER_INLINE |
| lex_input_interface<TokenT> * |
| new_lexer_gen<IteratorT, PositionT, TokenT>::new_lexer(IteratorT const &first, |
| IteratorT const &last, PositionT const &pos, |
| boost::wave::language_support language) |
| { |
| using re2clex::lex_functor; |
| return new lex_functor<IteratorT, PositionT, TokenT>(first, last, pos, language); |
| } |
| |
| #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| } // namespace cpplexer |
| } // namespace wave |
| } // namespace boost |
| |
| // the suffix header occurs after all of the code |
| #ifdef BOOST_HAS_ABI_HEADERS |
| #include BOOST_ABI_SUFFIX |
| #endif |
| |
| #endif // !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED) |