| // string_token.hpp |
| // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) |
| // |
| // Distributed under the Boost Software License, Version 1.0. (See accompanying |
| // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| #ifndef BOOST_LEXER_STRING_TOKEN_HPP |
| #define BOOST_LEXER_STRING_TOKEN_HPP |
| |
| #include <algorithm> |
| #include "size_t.hpp" |
| #include "consts.hpp" // num_chars, num_wchar_ts |
| #include <string> |
| |
| namespace boost |
| { |
| namespace lexer |
| { |
| template<typename CharT> |
| struct basic_string_token |
| { |
| typedef std::basic_string<CharT> string; |
| |
| bool _negated; |
| string _charset; |
| |
| basic_string_token () : |
| _negated (false) |
| { |
| } |
| |
| basic_string_token (const bool negated_, const string &charset_) : |
| _negated (negated_), |
| _charset (charset_) |
| { |
| } |
| |
| void remove_duplicates () |
| { |
| const CharT *start_ = _charset.c_str (); |
| const CharT *end_ = start_ + _charset.size (); |
| |
| // Optimisation for very large charsets: |
| // sorting via pointers is much quicker than |
| // via iterators... |
| std::sort (const_cast<CharT *> (start_), const_cast<CharT *> (end_)); |
| _charset.erase (std::unique (_charset.begin (), _charset.end ()), |
| _charset.end ()); |
| } |
| |
| void normalise () |
| { |
| const std::size_t max_chars_ = sizeof (CharT) == 1 ? |
| num_chars : num_wchar_ts; |
| |
| if (_charset.length () == max_chars_) |
| { |
| _negated = !_negated; |
| #if defined _MSC_VER && _MSC_VER <= 1200 |
| _charset.erase (); |
| #else |
| _charset.clear (); |
| #endif |
| } |
| else if (_charset.length () > max_chars_ / 2) |
| { |
| negate (); |
| } |
| } |
| |
| void negate () |
| { |
| const std::size_t max_chars_ = sizeof (CharT) == 1 ? |
| num_chars : num_wchar_ts; |
| CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0; |
| string temp_; |
| const CharT *curr_ = _charset.c_str (); |
| const CharT *chars_end_ = curr_ + _charset.size (); |
| |
| _negated = !_negated; |
| temp_.resize (max_chars_ - _charset.size ()); |
| |
| CharT *ptr_ = const_cast<CharT *> (temp_.c_str ()); |
| std::size_t i_ = 0; |
| |
| while (curr_ < chars_end_) |
| { |
| while (*curr_ > curr_char_) |
| { |
| *ptr_ = curr_char_; |
| ++ptr_; |
| ++curr_char_; |
| ++i_; |
| } |
| |
| ++curr_char_; |
| ++curr_; |
| ++i_; |
| } |
| |
| for (; i_ < max_chars_; ++i_) |
| { |
| *ptr_ = curr_char_; |
| ++ptr_; |
| ++curr_char_; |
| } |
| |
| _charset = temp_; |
| } |
| |
| bool operator < (const basic_string_token &rhs_) const |
| { |
| return _negated < rhs_._negated || |
| (_negated == rhs_._negated && _charset < rhs_._charset); |
| } |
| |
| bool empty () const |
| { |
| return _charset.empty () && !_negated; |
| } |
| |
| bool any () const |
| { |
| return _charset.empty () && _negated; |
| } |
| |
| void clear () |
| { |
| _negated = false; |
| #if defined _MSC_VER && _MSC_VER <= 1200 |
| _charset.erase (); |
| #else |
| _charset.clear (); |
| #endif |
| } |
| |
| void intersect (basic_string_token &rhs_, basic_string_token &overlap_) |
| { |
| if ((any () && rhs_.any ()) || (_negated == rhs_._negated && |
| !any () && !rhs_.any ())) |
| { |
| intersect_same_types (rhs_, overlap_); |
| } |
| else |
| { |
| intersect_diff_types (rhs_, overlap_); |
| } |
| } |
| |
| static void escape_char (const CharT ch_, string &out_) |
| { |
| switch (ch_) |
| { |
| case '\0': |
| out_ += '\\'; |
| out_ += '0'; |
| break; |
| case '\a': |
| out_ += '\\'; |
| out_ += 'a'; |
| break; |
| case '\b': |
| out_ += '\\'; |
| out_ += 'b'; |
| break; |
| case 27: |
| out_ += '\\'; |
| out_ += 'x'; |
| out_ += '1'; |
| out_ += 'b'; |
| break; |
| case '\f': |
| out_ += '\\'; |
| out_ += 'f'; |
| break; |
| case '\n': |
| out_ += '\\'; |
| out_ += 'n'; |
| break; |
| case '\r': |
| out_ += '\\'; |
| out_ += 'r'; |
| break; |
| case '\t': |
| out_ += '\\'; |
| out_ += 't'; |
| break; |
| case '\v': |
| out_ += '\\'; |
| out_ += 'v'; |
| break; |
| case '\\': |
| out_ += '\\'; |
| out_ += '\\'; |
| break; |
| case '"': |
| out_ += '\\'; |
| out_ += '"'; |
| break; |
| case '\'': |
| out_ += '\\'; |
| out_ += '\''; |
| break; |
| default: |
| { |
| if (ch_ < 32 && ch_ >= 0) |
| { |
| std::basic_stringstream<CharT> ss_; |
| |
| out_ += '\\'; |
| out_ += 'x'; |
| ss_ << std::hex << |
| static_cast<std::size_t> (ch_); |
| out_ += ss_.str (); |
| } |
| else |
| { |
| out_ += ch_; |
| } |
| |
| break; |
| } |
| } |
| } |
| |
| private: |
| void intersect_same_types (basic_string_token &rhs_, |
| basic_string_token &overlap_) |
| { |
| if (any ()) |
| { |
| clear (); |
| overlap_._negated = true; |
| rhs_.clear (); |
| } |
| else |
| { |
| typename string::iterator iter_ = _charset.begin (); |
| typename string::iterator end_ = _charset.end (); |
| typename string::iterator rhs_iter_ = rhs_._charset.begin (); |
| typename string::iterator rhs_end_ = rhs_._charset.end (); |
| |
| overlap_._negated = _negated; |
| |
| while (iter_ != end_ && rhs_iter_ != rhs_end_) |
| { |
| if (*iter_ < *rhs_iter_) |
| { |
| ++iter_; |
| } |
| else if (*iter_ > *rhs_iter_) |
| { |
| ++rhs_iter_; |
| } |
| else |
| { |
| overlap_._charset += *iter_; |
| iter_ = _charset.erase (iter_); |
| end_ = _charset.end (); |
| rhs_iter_ = rhs_._charset.erase (rhs_iter_); |
| rhs_end_ = rhs_._charset.end (); |
| } |
| } |
| |
| if (_negated) |
| { |
| // duplicates already merged, so safe to merge |
| // using std lib. |
| |
| // src, dest |
| merge (_charset, overlap_._charset); |
| // duplicates already merged, so safe to merge |
| // using std lib. |
| |
| // src, dest |
| merge (rhs_._charset, overlap_._charset); |
| _negated = false; |
| rhs_._negated = false; |
| std::swap (_charset, rhs_._charset); |
| normalise (); |
| overlap_.normalise (); |
| rhs_.normalise (); |
| } |
| else if (!overlap_._charset.empty ()) |
| { |
| normalise (); |
| overlap_.normalise (); |
| rhs_.normalise (); |
| } |
| } |
| } |
| |
| void intersect_diff_types (basic_string_token &rhs_, |
| basic_string_token &overlap_) |
| { |
| if (any ()) |
| { |
| intersect_any (rhs_, overlap_); |
| } |
| else if (_negated) |
| { |
| intersect_negated (rhs_, overlap_); |
| } |
| else // _negated == false |
| { |
| intersect_charset (rhs_, overlap_); |
| } |
| } |
| |
| void intersect_any (basic_string_token &rhs_, basic_string_token &overlap_) |
| { |
| if (rhs_._negated) |
| { |
| rhs_.intersect_negated (*this, overlap_); |
| } |
| else // rhs._negated == false |
| { |
| rhs_.intersect_charset (*this, overlap_); |
| } |
| } |
| |
| void intersect_negated (basic_string_token &rhs_, |
| basic_string_token &overlap_) |
| { |
| if (rhs_.any ()) |
| { |
| overlap_._negated = true; |
| overlap_._charset = _charset; |
| rhs_._negated = false; |
| rhs_._charset = _charset; |
| clear (); |
| } |
| else // rhs._negated == false |
| { |
| rhs_.intersect_charset (*this, overlap_); |
| } |
| } |
| |
| void intersect_charset (basic_string_token &rhs_, |
| basic_string_token &overlap_) |
| { |
| if (rhs_.any ()) |
| { |
| overlap_._charset = _charset; |
| rhs_._negated = true; |
| rhs_._charset = _charset; |
| clear (); |
| } |
| else // rhs_._negated == true |
| { |
| typename string::iterator iter_ = _charset.begin (); |
| typename string::iterator end_ = _charset.end (); |
| typename string::iterator rhs_iter_ = rhs_._charset.begin (); |
| typename string::iterator rhs_end_ = rhs_._charset.end (); |
| |
| while (iter_ != end_ && rhs_iter_ != rhs_end_) |
| { |
| if (*iter_ < *rhs_iter_) |
| { |
| overlap_._charset += *iter_; |
| rhs_iter_ = rhs_._charset.insert (rhs_iter_, *iter_); |
| ++rhs_iter_; |
| rhs_end_ = rhs_._charset.end (); |
| iter_ = _charset.erase (iter_); |
| end_ = _charset.end (); |
| } |
| else if (*iter_ > *rhs_iter_) |
| { |
| ++rhs_iter_; |
| } |
| else |
| { |
| ++iter_; |
| ++rhs_iter_; |
| } |
| } |
| |
| if (iter_ != end_) |
| { |
| // nothing bigger in rhs_ than iter_, |
| // so safe to merge using std lib. |
| string temp_ (iter_, end_); |
| |
| // src, dest |
| merge (temp_, overlap_._charset); |
| _charset.erase (iter_, end_); |
| } |
| |
| if (!overlap_._charset.empty ()) |
| { |
| merge (overlap_._charset, rhs_._charset); |
| // possible duplicates, so check for any and erase. |
| rhs_._charset.erase (std::unique (rhs_._charset.begin (), |
| rhs_._charset.end ()), rhs_._charset.end ()); |
| normalise (); |
| overlap_.normalise (); |
| rhs_.normalise (); |
| } |
| } |
| } |
| |
| void merge (string &src_, string &dest_) |
| { |
| string tmp_ (src_.size () + dest_.size (), 0); |
| |
| std::merge (src_.begin (), src_.end (), dest_.begin (), dest_.end (), |
| tmp_.begin ()); |
| dest_ = tmp_; |
| } |
| }; |
| } |
| } |
| |
| #endif |