| // Copyright 2008 John Maddock |
| // |
| // Use, modification and distribution are subject to the |
| // Boost Software License, Version 1.0. |
| // (See accompanying file LICENSE_1_0.txt |
| // or copy at http://www.boost.org/LICENSE_1_0.txt) |
| |
| #include "auto_index.hpp" |
| |
| bool need_defaults = true; |
| |
| void install_default_scanners() |
| { |
| need_defaults = false; |
| // |
| // Set the default scanners if they're not defined already: |
| // |
| file_scanner s; |
| s.type = "class_name"; |
| if(file_scanner_set.find(s) == file_scanner_set.end()) |
| { |
| add_file_scanner( |
| "class_name", // Index type |
| // Header file scanner regex: |
| // possibly leading whitespace: |
| "^[[:space:]]*" |
| // possible template declaration: |
| "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" |
| // class or struct: |
| "(class|struct)[[:space:]]*" |
| // leading declspec macros etc: |
| "(" |
| "\\<\\w+\\>" |
| "(" |
| "[[:blank:]]*\\([^)]*\\)" |
| ")?" |
| "[[:space:]]*" |
| ")*" |
| // the class name |
| "(\\<\\w*\\>)[[:space:]]*" |
| // template specialisation parameters |
| "(<[^;:{]+>)?[[:space:]]*" |
| // terminate in { or : |
| "(\\{|:[^;\\{()]*\\{)", |
| |
| "(?:class|struct)[^;{]+\\\\<\\5\\\\>[^;{]+\\\\{", // Format string to create indexing regex. |
| "\\5", // Format string to create index term. |
| "", // Filter regex for section id's. |
| "" // Filter regex for filenames. |
| ); |
| } |
| |
| s.type = "typedef_name"; |
| if(file_scanner_set.find(s) == file_scanner_set.end()) |
| { |
| add_file_scanner( |
| "typedef_name", // Index type |
| "typedef[^;{}#]+?(\\w+)\\s*;", // scanner regex |
| "typedef[^;]+\\\\<\\1\\\\>\\\\s*;", // Format string to create indexing regex. |
| "\\1", // Format string to create index term. |
| "", // Filter regex for section id's. |
| "" // Filter regex for filenames. |
| ); |
| } |
| |
| s.type = "macro_name"; |
| if(file_scanner_set.find(s) == file_scanner_set.end()) |
| { |
| add_file_scanner( |
| "macro_name", // Index type |
| "^\\s*#\\s*define\\s+(\\w+)", // scanner regex |
| "\\\\<\\1\\\\>", // Format string to create indexing regex. |
| "\\1", // Format string to create index term. |
| "", // Filter regex for section id's. |
| "" // Filter regex for filenames. |
| ); |
| } |
| |
| s.type = "function_name"; |
| if(file_scanner_set.find(s) == file_scanner_set.end()) |
| { |
| add_file_scanner( |
| "function_name", // Index type |
| "\\w+(?:\\s*<[^>]>)?[\\s&*]+?(\\w+)\\s*(?:BOOST_[[:upper:]_]+\\s*)?\\([^\\)]*\\)\\s*[;{]", // scanner regex |
| "\\\\<\\\\w+\\\\>(?:\\\\s+<[^>]*>)?[\\\\s&*]+\\\\<\\1\\\\>\\\\s*\\\\([^;{]*\\\\)", // Format string to create indexing regex. |
| "\\1", // Format string to create index term. |
| "", // Filter regex for section id's. |
| "" // Filter regex for filenames. |
| ); |
| } |
| } |
| |
| // |
| // Helper to dump file contents into a std::string: |
| // |
| void load_file(std::string& s, std::istream& is) |
| { |
| s.erase(); |
| if(is.bad()) return; |
| s.reserve(is.rdbuf()->in_avail()); |
| char c; |
| while(is.get(c)) |
| { |
| if(s.capacity() == s.size()) |
| s.reserve(s.capacity() * 3); |
| s.append(1, c); |
| } |
| } |
| // |
| // Helper to convert string from external source into valid XML: |
| // |
| std::string escape_to_xml(const std::string& in) |
| { |
| std::string result; |
| for(std::string::size_type i = 0; i < in.size(); ++i) |
| { |
| switch(in[i]) |
| { |
| case '&': |
| result.append("&"); |
| break; |
| case '<': |
| result.append("<"); |
| break; |
| case '>': |
| result.append(">"); |
| break; |
| case '"': |
| result.append("""); |
| break; |
| default: |
| result.append(1, in[i]); |
| } |
| } |
| return result; |
| } |
| // |
| // Scan a source file for things to index: |
| // |
| void scan_file(const std::string& file) |
| { |
| if(need_defaults) |
| install_default_scanners(); |
| if(verbose) |
| std::cout << "Scanning file... " << file << std::endl; |
| std::string text; |
| std::ifstream is(file.c_str()); |
| if(!is.peek() || !is.good()) |
| throw std::runtime_error(std::string("Unable to read from file: ") + file); |
| load_file(text, is); |
| |
| for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan) |
| { |
| bool need_debug = false; |
| if(!debug.empty() && regex_match(pscan->type, ::debug)) |
| { |
| need_debug = true; |
| std::cout << "Processing scanner " << pscan->type << " on file " << file << std::endl; |
| std::cout << "Scanner regex:" << pscan->scanner << std::endl; |
| std::cout << "Scanner formatter (search regex):" << pscan->format_string << std::endl; |
| std::cout << "Scanner formatter (index term):" << pscan->term_formatter << std::endl; |
| std::cout << "Scanner file name filter:" << pscan->file_name_filter << std::endl; |
| std::cout << "Scanner section id filter:" << pscan->section_filter << std::endl; |
| } |
| if(!pscan->file_name_filter.empty()) |
| { |
| if(!regex_match(file, pscan->file_name_filter)) |
| { |
| if(need_debug) |
| { |
| std::cout << "File failed to match file name filter, this file will be skipped..." << std::endl; |
| } |
| continue; // skip this file |
| } |
| } |
| if(verbose && !need_debug) |
| std::cout << "Scanning for type \"" << (*pscan).type << "\" ... " << std::endl; |
| boost::sregex_iterator i(text.begin(), text.end(), (*pscan).scanner), j; |
| while(i != j) |
| { |
| try |
| { |
| index_info info; |
| info.term = escape_to_xml(i->format(pscan->term_formatter)); |
| info.search_text = i->format(pscan->format_string); |
| info.category = pscan->type; |
| if(!pscan->section_filter.empty()) |
| info.search_id = pscan->section_filter; |
| std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info); |
| if(pos.second) |
| { |
| if(verbose || need_debug) |
| std::cout << "Indexing " << info.term << " as type " << info.category << std::endl; |
| if(need_debug) |
| std::cout << "Search regex will be: \"" << info.search_text << "\"" << |
| " ID constraint is: \"" << info.search_id << "\"" |
| << "Found text was: " << i->str() << std::endl; |
| if(pos.first->search_text != info.search_text) |
| { |
| // |
| // Merge the search terms: |
| // |
| const_cast<boost::regex&>(pos.first->search_text) = |
| "(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")"; |
| } |
| if(pos.first->search_id != info.search_id) |
| { |
| // |
| // Merge the ID constraints: |
| // |
| const_cast<boost::regex&>(pos.first->search_id) = |
| "(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")"; |
| } |
| } |
| } |
| catch(const boost::regex_error& e) |
| { |
| std::cerr << "Unable to create regular expression from found index term:\"" |
| << i->format(pscan->term_formatter) << "\" In file " << file << std::endl; |
| std::cerr << e.what() << std::endl; |
| } |
| catch(const std::exception& e) |
| { |
| std::cerr << "Unable to create index term:\"" |
| << i->format(pscan->term_formatter) << "\" In file " << file << std::endl; |
| std::cerr << e.what() << std::endl; |
| throw; |
| } |
| ++i; |
| } |
| } |
| } |
| // |
| // Scan a whole directory for files to search: |
| // |
| void scan_dir(const std::string& dir, const std::string& mask, bool recurse) |
| { |
| using namespace boost::filesystem; |
| boost::regex e(mask); |
| directory_iterator i(dir), j; |
| |
| while(i != j) |
| { |
| if(regex_match(i->path().filename().string(), e)) |
| { |
| scan_file(i->path().string()); |
| } |
| else if(recurse && is_directory(i->status())) |
| { |
| scan_dir(i->path().string(), mask, recurse); |
| } |
| ++i; |
| } |
| } |
| // |
| // Remove quotes from a string: |
| // |
| std::string unquote(const std::string& s) |
| { |
| std::string result(s); |
| if((s.size() >= 2) && (*s.begin() == '\"') && (*s.rbegin() == '\"')) |
| { |
| result.erase(result.begin()); |
| result.erase(result.end() - 1); |
| } |
| return result; |
| } |
| // |
| // Load and process a script file: |
| // |
| void process_script(const std::string& script) |
| { |
| static const boost::regex comment_parser( |
| "\\s*(?:#.*)?$" |
| ); |
| static const boost::regex scan_parser( |
| "!scan[[:space:]]+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*" |
| ); |
| static const boost::regex scan_dir_parser( |
| "!scan-path[[:space:]]+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" |
| "[[:space:]]+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" |
| "(?:" |
| "[[:space:]]+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" |
| ")?\\s*" |
| ); |
| static const boost::regex entry_parser( |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" |
| "(?:" |
| "[[:space:]]+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")" |
| "(?:" |
| "[[:space:]]+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")" |
| "(?:" |
| "[[:space:]]+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")" |
| ")?" |
| ")?" |
| ")?" |
| "[[:space:]]*"); |
| static const boost::regex rewrite_parser( |
| "!(rewrite-name|rewrite-id)\\s+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*" |
| ); |
| static const boost::regex debug_parser( |
| "!debug\\s+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*" |
| ); |
| static const boost::regex define_scanner_parser( |
| "!define-scanner\\s+" |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // type, index 1 |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // scanner regex, index 2 |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // format string, index 3 |
| "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // format string for name, index 4 |
| "(?:" |
| "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // id-filter, index 5 |
| "(?:" |
| "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // filename-filter, index 6 |
| ")?" |
| ")?" |
| "\\s*" |
| ); |
| static const boost::regex error_parser("!.*"); |
| |
| if(verbose) |
| std::cout << "Processing script " << script << std::endl; |
| boost::smatch what; |
| std::string line; |
| std::ifstream is(script.c_str()); |
| if(is.bad() || !exists(boost::filesystem::path(script))) |
| { |
| throw std::runtime_error(std::string("Could not open script file: ") + script); |
| } |
| while(std::getline(is, line).good()) |
| { |
| if(regex_match(line, what, comment_parser)) |
| { |
| // Nothing to do here... |
| } |
| else if(regex_match(line, what, scan_parser)) |
| { |
| std::string f = unquote(what[1].str()); |
| if(!boost::filesystem::path(f).is_complete()) |
| { |
| if(prefix.size()) |
| { |
| boost::filesystem::path base(prefix); |
| base /= f; |
| f = base.string(); |
| } |
| else |
| { |
| boost::filesystem::path base(script); |
| base.remove_filename(); |
| base /= f; |
| f = base.string(); |
| } |
| } |
| if(!exists(boost::filesystem::path(f))) |
| throw std::runtime_error("Error the file requested for scanning does not exist: " + f); |
| scan_file(f); |
| } |
| else if(regex_match(line, what, debug_parser)) |
| { |
| debug = unquote(what[1].str()); |
| } |
| else if(regex_match(line, what, define_scanner_parser)) |
| { |
| add_file_scanner(unquote(what.str(1)), unquote(what.str(2)), unquote(what.str(3)), |
| unquote(what.str(4)), unquote(what.str(5)), unquote(what.str(6))); |
| } |
| else if(regex_match(line, what, scan_dir_parser)) |
| { |
| std::string d = unquote(what[1].str()); |
| std::string m = unquote(what[2].str()); |
| bool r = unquote(what[3].str()) == "true"; |
| if(!boost::filesystem::path(d).is_complete()) |
| { |
| if(prefix.size()) |
| { |
| boost::filesystem::path base(prefix); |
| base /= d; |
| d = base.string(); |
| } |
| else |
| { |
| boost::filesystem::path base(script); |
| base.remove_filename(); |
| base /= d; |
| d = base.string(); |
| } |
| } |
| if(verbose) |
| std::cout << "Scanning directory " << d << std::endl; |
| if(!exists(boost::filesystem::path(d))) |
| throw std::runtime_error("Error the path requested for scanning does not exist: " + d); |
| scan_dir(d, m, r); |
| } |
| else if(regex_match(line, what, rewrite_parser)) |
| { |
| bool id = what[1] == "rewrite-id"; |
| std::string a = unquote(what[2].str()); |
| std::string b = unquote(what[3].str()); |
| id_rewrite_list.push_back(id_rewrite_rule(a, b, id)); |
| } |
| else if(line.compare(0, 9, "!exclude ") == 0) |
| { |
| static const boost::regex delim("([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"); |
| boost::sregex_token_iterator i(line.begin() + 9, line.end(), delim, 0), j; |
| while(i != j) |
| { |
| index_info info; |
| info.term = escape_to_xml(unquote(*i)); |
| // Erase all entries that have a category in our scanner set, |
| // plus any entry with no category at all: |
| index_terms.erase(info); |
| for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan) |
| { |
| info.category = (*pscan).type; |
| index_terms.erase(info); |
| } |
| ++i; |
| } |
| } |
| else if(regex_match(line, error_parser)) |
| { |
| std::cerr << "Error: Unable to process line: " << line << std::endl; |
| } |
| else if(regex_match(line, what, entry_parser)) |
| { |
| try{ |
| // what[1] is the Index entry |
| // what[2] is the regex to search for (optional) |
| // what[3] is a section id that must be matched |
| // in order for the term to be indexed (optional) |
| // what[4] is the index category to place the term in (optional). |
| index_info info; |
| info.term = escape_to_xml(unquote(what.str(1))); |
| std::string s = unquote(what.str(2)); |
| if(s.size()) |
| info.search_text = boost::regex(s, boost::regex::icase|boost::regex::perl); |
| else |
| info.search_text = boost::regex("\\<" + what.str(1) + "\\>", boost::regex::icase|boost::regex::perl); |
| |
| s = unquote(what.str(3)); |
| if(s.size()) |
| info.search_id = s; |
| if(what[4].matched) |
| info.category = unquote(what.str(4)); |
| std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info); |
| if(pos.second) |
| { |
| if(pos.first->search_text != info.search_text) |
| { |
| // |
| // Merge the search terms: |
| // |
| const_cast<boost::regex&>(pos.first->search_text) = |
| "(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")"; |
| } |
| if(pos.first->search_id != info.search_id) |
| { |
| // |
| // Merge the ID constraints: |
| // |
| const_cast<boost::regex&>(pos.first->search_id) = |
| "(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")"; |
| } |
| } |
| } |
| catch(const boost::regex_error&) |
| { |
| std::cerr << "Unable to process regular expression in script line:\n \"" |
| << line << "\"" << std::endl; |
| throw; |
| } |
| catch(const std::exception&) |
| { |
| std::cerr << "Unable to process script line:\n \"" |
| << line << "\"" << std::endl; |
| throw; |
| } |
| } |
| else |
| { |
| std::cerr << "Error: Unable to process line: " << line << std::endl; |
| } |
| } |
| } |
| |