blob: e6d084536a144872e9ac21c1202a8ee4973a171c [file] [log] [blame]
/*=============================================================================
Boost.Wave: A Standard compliant C++ preprocessor library
Xpressive based generic lexer
http://www.boost.org/
Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
#if !defined(XPRESSIVE_LEXER_HPP)
#define XPRESSIVE_LEXER_HPP
#include <string>
#include <vector>
#include <utility>
#include <algorithm>
#include <boost/detail/iterator.hpp>
#include <boost/xpressive/xpressive.hpp>
namespace boost {
namespace wave {
namespace cpplexer {
namespace xlex {
///////////////////////////////////////////////////////////////////////////////
template <
typename Iterator = char const*,
typename Token = int,
typename Callback = bool (*)(
Iterator const&, Iterator&, Iterator const&, Token const&)
>
class xpressive_lexer
{
private:
typedef typename boost::detail::iterator_traits<Iterator>::value_type
char_type;
typedef std::basic_string<char_type> string_type;
// this represents a single token to match
struct regex_info
{
typedef boost::xpressive::basic_regex<Iterator> regex_type;
string_type str;
Token token;
regex_type regex;
Callback callback;
regex_info(string_type const& str, Token const& token,
Callback const& callback)
: str(str), token(token),
regex(regex_type::compile(str)),
callback(callback)
{}
// these structures are to be ordered by the token id
friend bool operator< (regex_info const& lhs, regex_info const& rhs)
{
return lhs.token < rhs.token;
}
};
typedef std::vector<regex_info> regex_list_type;
public:
typedef Callback callback_type;
xpressive_lexer() {}
// register a the regex with the lexer
void register_regex(string_type const& regex, Token const& id,
Callback const& cb = Callback());
// match the given input and return the next recognized token
Token next_token(Iterator &first, Iterator const& last, string_type& token);
private:
regex_list_type regex_list;
};
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Token, typename Callback>
inline void
xpressive_lexer<Iterator, Token, Callback>::register_regex(
string_type const& regex, Token const& id, Callback const& cb)
{
regex_list.push_back(regex_info(regex, id, cb));
}
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Token, typename Callback>
inline Token
xpressive_lexer<Iterator, Token, Callback>::next_token(
Iterator &first, Iterator const& last, string_type& token)
{
typedef typename regex_list_type::iterator iterator;
xpressive::match_results<Iterator> regex_result;
for (iterator it = regex_list.begin(), end = regex_list.end(); it != end; ++it)
{
namespace xpressive = boost::xpressive;
// regex_info const& curr_regex = *it;
// xpressive::match_results<Iterator> regex_result;
if (xpressive::regex_search(first, last, regex_result, (*it).regex,
xpressive::regex_constants::match_continuous))
{
Iterator saved = first;
Token rval = (*it).token;
std::advance(first, regex_result.length());
token = string_type(saved, first);
if (NULL != (*it).callback) {
// execute corresponding callback
if ((*it).callback(saved, first, last, (*it).token))
rval = next_token(first, last, token);
}
return rval;
}
}
return Token(-1); // TODO: change this to use token_traits<Token>
}
///////////////////////////////////////////////////////////////////////////////
}}}} // boost::wave::cpplexer::xlex
#endif // !defined(XPRESSIVE_LEXER_HPP)