| /*============================================================================= |
| Copyright (c) 2005 2006 Joel de Guzman |
| http://spirit.sourceforge.net/ |
| |
| Use, modification and distribution is subject to the Boost Software |
| License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at |
| http://www.boost.org/LICENSE_1_0.txt) |
| =============================================================================*/ |
| #include "post_process.hpp" |
| #include "utils.hpp" |
| #include <boost/spirit/include/classic_core.hpp> |
| #include <boost/bind.hpp> |
| #include <set> |
| #include <stack> |
| #include <cctype> |
| |
| namespace quickbook |
| { |
| namespace cl = boost::spirit::classic; |
| typedef std::string::const_iterator iter_type; |
| |
| struct printer |
| { |
| printer(std::string& out, int& current_indent, int linewidth) |
| : prev(0), out(out), current_indent(current_indent) , column(0) |
| , in_string(false), linewidth(linewidth) {} |
| |
| void indent() |
| { |
| BOOST_ASSERT(current_indent >= 0); // this should not happen! |
| for (int i = 0; i < current_indent; ++i) |
| out += ' '; |
| column = current_indent; |
| } |
| |
| void break_line() |
| { |
| out.erase(out.find_last_not_of(' ')+1); // trim trailing spaces |
| out += '\n'; |
| indent(); |
| } |
| |
| bool line_is_empty() const |
| { |
| for (iter_type i = out.end()-(column-current_indent); i != out.end(); ++i) |
| { |
| if (*i != ' ') |
| return false; |
| } |
| return true; |
| } |
| |
| void align_indent() |
| { |
| // make sure we are at the proper indent position |
| if (column != current_indent) |
| { |
| if (column > current_indent) |
| { |
| if (line_is_empty()) |
| { |
| // trim just enough trailing spaces down to current_indent position |
| out.erase(out.end()-(column-current_indent), out.end()); |
| column = current_indent; |
| } |
| else |
| { |
| // nope, line is not empty. do a hard CR |
| break_line(); |
| } |
| } |
| else |
| { |
| // will this happen? (i.e. column <= current_indent) |
| while (column != current_indent) |
| { |
| out += ' '; |
| ++column; |
| } |
| } |
| } |
| } |
| |
| void print(char ch) |
| { |
| // Print a char. Attempt to break the line if we are exceeding |
| // the target linewidth. The linewidth is not an absolute limit. |
| // There are many cases where a line will exceed the linewidth |
| // and there is no way to properly break the line. Preformatted |
| // code that exceeds the linewidth are examples. We cannot break |
| // preformatted code. We shall not attempt to be very strict with |
| // line breaking. What's more important is to have a reproducable |
| // output (i.e. processing two logically equivalent xml files |
| // results in two lexically equivalent xml files). *** pretty |
| // formatting is a secondary goal *** |
| |
| // Strings will occur only in tag attributes. Normal content |
| // will have " instead. We shall deal only with tag |
| // attributes here. |
| if (ch == '"') |
| in_string = !in_string; // don't break strings! |
| |
| if (!in_string && std::isspace(static_cast<unsigned char>(ch))) |
| { |
| // we can break spaces if they are not inside strings |
| if (!std::isspace(static_cast<unsigned char>(prev))) |
| { |
| if (column >= linewidth) |
| { |
| break_line(); |
| if (column == 0 && ch == ' ') |
| { |
| ++column; |
| out += ' '; |
| } |
| } |
| else |
| { |
| ++column; |
| out += ' '; |
| } |
| } |
| } |
| else |
| { |
| // we can break tag boundaries and stuff after |
| // delimiters if they are not inside strings |
| // and *only-if* the preceding char is a space |
| if (!in_string |
| && column >= linewidth |
| && (ch == '<' && std::isspace(static_cast<unsigned char>(prev)))) |
| break_line(); |
| out += ch; |
| ++column; |
| } |
| |
| prev = ch; |
| } |
| |
| void |
| print(iter_type f, iter_type l) |
| { |
| for (iter_type i = f; i != l; ++i) |
| print(*i); |
| } |
| |
| void |
| print_tag(iter_type f, iter_type l, bool is_flow_tag) |
| { |
| if (is_flow_tag) |
| { |
| print(f, l); |
| } |
| else |
| { |
| // This is not a flow tag, so, we're going to do a |
| // carriage return anyway. Let us remove extra right |
| // spaces. |
| std::string str(f, l); |
| BOOST_ASSERT(f != l); // this should not happen |
| iter_type i = str.end(); |
| while (i != str.begin() && std::isspace(static_cast<unsigned char>(*(i-1)))) |
| --i; |
| print(str.begin(), i); |
| } |
| } |
| |
| char prev; |
| std::string& out; |
| int& current_indent; |
| int column; |
| bool in_string; |
| int linewidth; |
| }; |
| |
| char const* block_tags_[] = |
| { |
| "author" |
| , "blockquote" |
| , "bridgehead" |
| , "callout" |
| , "calloutlist" |
| , "caution" |
| , "copyright" |
| , "entry" |
| , "important" |
| , "informaltable" |
| , "itemizedlist" |
| , "legalnotice" |
| , "listitem" |
| , "note" |
| , "orderedlist" |
| , "para" |
| , "row" |
| , "section" |
| , "simpara" |
| , "table" |
| , "tbody" |
| , "textobject" |
| , "tgroup" |
| , "thead" |
| , "tip" |
| , "variablelist" |
| , "varlistentry" |
| , "warning" |
| , "xml" |
| , "xi:include" |
| }; |
| |
| char const* doc_types_[] = |
| { |
| "book" |
| , "article" |
| , "library" |
| , "chapter" |
| , "part" |
| , "appendix" |
| , "preface" |
| , "qandadiv" |
| , "qandaset" |
| , "reference" |
| , "set" |
| }; |
| |
| struct tidy_compiler |
| { |
| tidy_compiler(std::string& out, int linewidth) |
| : out(out), current_indent(0), printer_(out, current_indent, linewidth) |
| { |
| static int const n_block_tags = sizeof(block_tags_)/sizeof(char const*); |
| for (int i = 0; i != n_block_tags; ++i) |
| { |
| block_tags.insert(block_tags_[i]); |
| } |
| |
| static int const n_doc_types = sizeof(doc_types_)/sizeof(char const*); |
| for (int i = 0; i != n_doc_types; ++i) |
| { |
| block_tags.insert(doc_types_[i]); |
| block_tags.insert(doc_types_[i] + std::string("info")); |
| block_tags.insert(doc_types_[i] + std::string("purpose")); |
| } |
| } |
| |
| bool is_flow_tag(std::string const& tag) |
| { |
| return block_tags.find(tag) == block_tags.end(); |
| } |
| |
| std::set<std::string> block_tags; |
| std::stack<std::string> tags; |
| std::string& out; |
| int current_indent; |
| printer printer_; |
| std::string current_tag; |
| }; |
| |
| struct tidy_grammar : cl::grammar<tidy_grammar> |
| { |
| tidy_grammar(tidy_compiler& state, int indent) |
| : state(state), indent(indent) {} |
| |
| template <typename Scanner> |
| struct definition |
| { |
| definition(tidy_grammar const& self) |
| { |
| tag = (cl::lexeme_d[+(cl::alpha_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)]; |
| |
| code = |
| "<programlisting>" |
| >> *(cl::anychar_p - "</programlisting>") |
| >> "</programlisting>" |
| ; |
| |
| // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ? |
| // It is there to preserve the space after the tag that is |
| // otherwise consumed by the cl::space_p skipper. |
| |
| escape = |
| cl::str_p("<!--quickbook-escape-prefix-->") >> |
| (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->"))) |
| [ |
| boost::bind(&tidy_grammar::do_escape, &self, _1, _2) |
| ] |
| >> cl::lexeme_d |
| [ |
| cl::str_p("<!--quickbook-escape-postfix-->") >> |
| (*cl::space_p) |
| [ |
| boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2) |
| ] |
| ] |
| ; |
| |
| start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; |
| start_end_tag = |
| '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p] |
| | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p] |
| | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p] |
| | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p] |
| ; |
| content = cl::lexeme_d[ +(cl::anychar_p - '<') ]; |
| end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; |
| |
| markup = |
| escape |
| | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)] |
| | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)] |
| | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)] |
| | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)] |
| | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)] |
| ; |
| |
| tidy = +markup; |
| } |
| |
| cl::rule<Scanner> const& |
| start() { return tidy; } |
| |
| cl::rule<Scanner> |
| tidy, tag, start_tag, start_end_tag, |
| content, end_tag, markup, code, escape; |
| }; |
| |
| void do_escape_post(iter_type f, iter_type l) const |
| { |
| for (iter_type i = f; i != l; ++i) |
| state.out += *i; |
| } |
| |
| void do_escape(iter_type f, iter_type l) const |
| { |
| while (f != l && std::isspace(*f)) |
| ++f; |
| for (iter_type i = f; i != l; ++i) |
| state.out += *i; |
| } |
| |
| void do_code(iter_type f, iter_type l) const |
| { |
| state.out += '\n'; |
| // print the string taking care of line |
| // ending CR/LF platform issues |
| for (iter_type i = f; i != l; ++i) |
| { |
| if (*i == '\n') |
| { |
| state.out += '\n'; |
| ++i; |
| if (i != l && *i != '\r') |
| state.out += *i; |
| } |
| else if (*i == '\r') |
| { |
| state.out += '\n'; |
| ++i; |
| if (i != l && *i != '\n') |
| state.out += *i; |
| } |
| else |
| { |
| state.out += *i; |
| } |
| } |
| state.out += '\n'; |
| state.printer_.indent(); |
| } |
| |
| void do_tag(iter_type f, iter_type l) const |
| { |
| state.current_tag = std::string(f, l); |
| } |
| |
| void do_start_end_tag(iter_type f, iter_type l) const |
| { |
| bool is_flow_tag = state.is_flow_tag(state.current_tag); |
| if (!is_flow_tag) |
| state.printer_.align_indent(); |
| state.printer_.print_tag(f, l, is_flow_tag); |
| if (!is_flow_tag) |
| state.printer_.break_line(); |
| } |
| |
| void do_start_tag(iter_type f, iter_type l) const |
| { |
| state.tags.push(state.current_tag); |
| bool is_flow_tag = state.is_flow_tag(state.current_tag); |
| if (!is_flow_tag) |
| state.printer_.align_indent(); |
| state.printer_.print_tag(f, l, is_flow_tag); |
| if (!is_flow_tag) |
| { |
| state.current_indent += indent; |
| state.printer_.break_line(); |
| } |
| } |
| |
| void do_content(iter_type f, iter_type l) const |
| { |
| state.printer_.print(f, l); |
| } |
| |
| void do_end_tag(iter_type f, iter_type l) const |
| { |
| bool is_flow_tag = state.is_flow_tag(state.tags.top()); |
| if (!is_flow_tag) |
| { |
| state.current_indent -= indent; |
| state.printer_.align_indent(); |
| } |
| state.printer_.print_tag(f, l, is_flow_tag); |
| if (!is_flow_tag) |
| state.printer_.break_line(); |
| state.tags.pop(); |
| } |
| |
| tidy_compiler& state; |
| int indent; |
| }; |
| |
| int post_process( |
| std::string const& in |
| , std::ostream& out |
| , int indent |
| , int linewidth) |
| { |
| if (indent == -1) |
| indent = 2; // set default to 2 |
| if (linewidth == -1) |
| linewidth = 80; // set default to 80 |
| |
| try |
| { |
| std::string tidy; |
| tidy_compiler state(tidy, linewidth); |
| tidy_grammar g(state, indent); |
| cl::parse_info<iter_type> r = parse(in.begin(), in.end(), g, cl::space_p); |
| if (r.full) |
| { |
| out << tidy; |
| return 0; |
| } |
| else |
| { |
| // fallback! |
| ::quickbook::detail::outerr("") |
| << "Warning: Post Processing Failed." |
| << std::endl; |
| out << in; |
| return 1; |
| } |
| } |
| |
| catch(...) |
| { |
| // fallback! |
| ::quickbook::detail::outerr("") |
| << "Post Processing Failed." |
| << std::endl; |
| out << in; |
| return 1; |
| } |
| } |
| } |
| |