| [/ |
| Copyright 2006-2007 John Maddock. |
| Distributed under the Boost Software License, Version 1.0. |
| (See accompanying file LICENSE_1_0.txt or copy at |
| http://www.boost.org/LICENSE_1_0.txt). |
| ] |
| |
| |
| [section:regex_replace regex_replace] |
| |
| #include <boost/regex.hpp> |
| |
| The algorithm [regex_replace] searches through a string finding all the |
| matches to the regular expression: for each match it then calls |
| [match_results_format] to format the string and sends the result to the |
| output iterator. Sections of text that do not match are copied to the |
| output unchanged only if the /flags/ parameter does not have the |
| flag `format_no_copy` set. If the flag `format_first_only` is set then |
| only the first occurrence is replaced rather than all occurrences. |
| |
| template <class OutputIterator, class BidirectionalIterator, class traits, class Formatter> |
| OutputIterator regex_replace(OutputIterator out, |
| BidirectionalIterator first, |
| BidirectionalIterator last, |
| const basic_regex<charT, traits>& e, |
| Formatter fmt, |
| match_flag_type flags = match_default); |
| |
| template <class traits, class Formatter> |
| basic_string<charT> regex_replace(const basic_string<charT>& s, |
| const basic_regex<charT, traits>& e, |
| Formatter fmt, |
| match_flag_type flags = match_default); |
| |
| |
| [h4 Description] |
| |
| template <class OutputIterator, class BidirectionalIterator, class traits, class Formatter> |
| OutputIterator regex_replace(OutputIterator out, |
| BidirectionalIterator first, |
| BidirectionalIterator last, |
| const basic_regex<charT, traits>& e, |
| Formatter fmt, |
| match_flag_type flags = match_default); |
| |
| Enumerates all the occurences of expression /e/ in the sequence \[first, last), |
| replacing each occurence with the string that results by merging the |
| match found with the format string /fmt/, and copies the resulting string to /out/. |
| In the case that /fmt/ is a unary, binary or ternary function object, then the |
| character sequence generated by that object is copied unchanged to the output when performing |
| a substitution. |
| |
| If the flag `format_no_copy` is set in /flags/ then unmatched sections of |
| text are not copied to output. |
| |
| If the flag `format_first_only` is set in flags then only the first |
| occurence of /e/ is replaced. |
| |
| The manner in which the format string /fmt/ is interpretted, along with the |
| rules used for finding matches, are determined by the flags set in /flags/: |
| see [match_flag_type]. |
| |
| [*Requires] |
| The type `Formatter` must be either a pointer to a null-terminated string |
| of type `char_type[]`, or be a container of `char_type`'s (for example |
| `std::basic_string<char_type>`) or be a unary, binary or ternary functor |
| that computes the replacement string from a function call: either |
| `fmt(what)` which must return a container of `char_type`'s to be used as the |
| replacement text, or either `fmt(what, out)` or `fmt(what, out, flags)`, both of |
| which write the replacement text to `*out`, and then return the new |
| OutputIterator position. In each case `what` is the [match_results] object |
| that represents the match found. Note that if the formatter is a functor, then it is |
| ['passed by value]: users that want to pass function objects with internal state |
| might want to use [@../../../../doc/html/ref.html Boost.Ref] to wrap the object so |
| that it's passed by reference. |
| |
| [*Effects]: Constructs an [regex_iterator] object: |
| |
| regex_iterator<BidirectionalIterator, charT, traits, Allocator> |
| i(first, last, e, flags), |
| |
| and uses /i/ to enumerate through all of the matches /m/ of type |
| [match_results] `<BidirectionalIterator>` that occur within the sequence |
| \[first, last). |
| |
| If no such matches are found and |
| |
| !(flags & format_no_copy) |
| |
| then calls |
| |
| std::copy(first, last, out). |
| |
| Otherwise, for each match found, if |
| |
| !(flags & format_no_copy) |
| |
| calls |
| |
| std::copy(m.prefix().first, m.prefix().last, out), |
| |
| and then calls |
| |
| m.format(out, fmt, flags). |
| |
| Finally if |
| |
| !(flags & format_no_copy) |
| |
| calls |
| |
| std::copy(last_m.suffix().first, last_m,suffix().last, out) |
| |
| where /last_m/ is a copy of the last match found. |
| |
| If `flags & format_first_only` is non-zero then only the first match found |
| is replaced. |
| |
| [*Throws]: `std::runtime_error` if the complexity of matching the expression |
| against an N character string begins to exceed O(N[super 2]), or if the |
| program runs out of stack space while matching the expression (if Boost.Regex is |
| configured in recursive mode), or if the matcher exhausts its permitted |
| memory allocation (if Boost.Regex is configured in non-recursive mode). |
| |
| [*Returns]: out. |
| |
| template <class traits, class Formatter> |
| basic_string<charT> regex_replace(const basic_string<charT>& s, |
| const basic_regex<charT, traits>& e, |
| Formatter fmt, |
| match_flag_type flags = match_default); |
| |
| [*Requires] |
| The type `Formatter` must be either a pointer to a null-terminated string |
| of type `char_type[]`, or be a container of `char_type`'s (for example |
| `std::basic_string<char_type>`) or be a unary, binary or ternary functor |
| that computes the replacement string from a function call: either |
| `fmt(what)` which must return a container of `char_type`'s to be used as the |
| replacement text, or either `fmt(what, out)` or `fmt(what, out, flags)`, both of |
| which write the replacement text to `*out`, and then return the new |
| OutputIterator position. In each case `what` is the [match_results] object |
| that represents the match found. |
| |
| [*Effects]: Constructs an object `basic_string<charT> result`, calls |
| `regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, flags)`, |
| and then returns `result`. |
| |
| [h4 Examples] |
| |
| The following example takes C/C++ source code as input, and outputs |
| syntax highlighted HTML code. |
| |
| #include <fstream> |
| #include <sstream> |
| #include <string> |
| #include <iterator> |
| #include <boost/regex.hpp> |
| #include <fstream> |
| #include <iostream> |
| |
| // purpose: |
| // takes the contents of a file and transform to |
| // syntax highlighted code in html format |
| |
| boost::regex e1, e2; |
| extern const char* expression_text; |
| extern const char* format_string; |
| extern const char* pre_expression; |
| extern const char* pre_format; |
| extern const char* header_text; |
| extern const char* footer_text; |
| |
| void load_file(std::string& s, std::istream& is) |
| { |
| s.erase(); |
| s.reserve(is.rdbuf()->in_avail()); |
| char c; |
| while(is.get(c)) |
| { |
| if(s.capacity() == s.size()) |
| s.reserve(s.capacity() * 3); |
| s.append(1, c); |
| } |
| } |
| |
| int main(int argc, const char** argv) |
| { |
| try{ |
| e1.assign(expression_text); |
| e2.assign(pre_expression); |
| for(int i = 1; i < argc; ++i) |
| { |
| std::cout << "Processing file " << argv[i] << std::endl; |
| std::ifstream fs(argv[i]); |
| std::string in; |
| load_file(in, fs); |
| std::string out_name(std::string(argv[i]) + std::string(".htm")); |
| std::ofstream os(out_name.c_str()); |
| os << header_text; |
| // strip '<' and '>' first by outputting to a |
| // temporary string stream |
| std::ostringstream t(std::ios::out | std::ios::binary); |
| std::ostream_iterator<char, char> oi(t); |
| boost::regex_replace(oi, in.begin(), in.end(), |
| e2, pre_format, boost::match_default | boost::format_all); |
| // then output to final output stream |
| // adding syntax highlighting: |
| std::string s(t.str()); |
| std::ostream_iterator<char, char> out(os); |
| boost::regex_replace(out, s.begin(), s.end(), |
| e1, format_string, boost::match_default | boost::format_all); |
| os << footer_text; |
| } |
| } |
| catch(...) |
| { return -1; } |
| return 0; |
| } |
| |
| extern const char* pre_expression = "(<)|(>)|(&)|\\r"; |
| extern const char* pre_format = "(?1<)(?2>)(?3&)"; |
| |
| |
| const char* expression_text = |
| // preprocessor directives: index 1 |
| "(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|" |
| // comment: index 2 |
| "(//[^\\n]*|/\\*.*?\\*/)|" |
| // literals: index 3 |
| "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+" |
| "(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|" |
| // string literals: index 4 |
| "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|" |
| // keywords: index 5 |
| "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import" |
| "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall" |
| "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool" |
| "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete" |
| "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto" |
| "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected" |
| "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast" |
| "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned" |
| "|using|virtual|void|volatile|wchar_t|while)\\>" |
| ; |
| |
| const char* format_string = "(?1<font color=\"#008040\">$&</font>)" |
| "(?2<I><font color=\"#000080\">$&</font></I>)" |
| "(?3<font color=\"#0000A0\">$&</font>)" |
| "(?4<font color=\"#0000FF\">$&</font>)" |
| "(?5<B>$&</B>)"; |
| |
| const char* header_text = |
| "<HTML>\n<HEAD>\n" |
| "<TITLE>Auto-generated html formated source</TITLE>\n" |
| "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n" |
| "</HEAD>\n" |
| "<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n" |
| "<P> </P>\n<PRE>"; |
| |
| const char* footer_text = "</PRE>\n</BODY>\n\n"; |
| |
| |
| [endsect] |
| |