boost_1_45_0/libs/flyweight/example/html.cpp - nest-learning-thermostat/5.1.3/boost - Git at Google

 /* Boost.Flyweight example of flyweight-based formatted text processing.
  *
  * Copyright 2006-2008 Joaquin M Lopez Munoz.
  * Distributed under the Boost Software License, Version 1.0.
  * (See accompanying file LICENSE_1_0.txt or copy at
  * http://www.boost.org/LICENSE_1_0.txt)
  *
  * See http://www.boost.org/libs/flyweight for library home page.
  */

 #include <boost/flyweight.hpp>
 #include <boost/functional/hash.hpp>
 #include <algorithm>
 #include <cctype>
 #include <cstdio>
 #include <fstream>
 #include <iostream>
 #include <iterator>
 #include <sstream>
 #include <string>
 #include <vector>

 #if defined(BOOST_NO_STDC_NAMESPACE)
 namespace std{using ::exit;using ::tolower;}
 #endif

 using namespace boost::flyweights;

 /* See the portability section of Boost.Hash at
  *   http://boost.org/doc/html/hash/portability.html
  * for an explanation of the ADL-related workarounds.
  */

 namespace boost{
 #if !defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
 namespace flyweights{
 #endif

 /* We hash the various flyweight types used in the program hashing
  * a *pointer* to their contents: this is consistent as equality of
  * flyweights implies equality of references.
  */

 template<typename T>
 std::size_t hash_value(const flyweight<T>& x)
 {
   boost::hash<const T*> h;
   return h(&x.get());
 }

 #if !defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
 } /* namespace flyweights */
 #endif
 } /* namespace boost */

 /* An HTML tag consists of a name and optional properties of the form
  * name1=value1 ... namen=valuen. We do not need to parse the properties
  * for the purposes of the program, hence they are all stored in
  * html_tag_data::properties in raw form.
  */

 struct html_tag_data
 {
   std::string name;
   std::string properties;
 };

 bool operator==(const html_tag_data& x,const html_tag_data& y)
 {
   return x.name==y.name&&x.properties==y.properties;
 }

 #if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
 namespace boost{
 #endif

 std::size_t hash_value(const html_tag_data& x)
 {
   std::size_t res=0;
   boost::hash_combine(res,x.name);
   boost::hash_combine(res,x.properties);
   return res;
 }

 #if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
 } /* namespace boost */
 #endif

 typedef flyweight<html_tag_data> html_tag;

 /* parse_tag is passed an iterator positioned at the first char of
  * the tag after the opening '<' and returns, if succesful, a parsed tag
  * and whether it is opening (<xx>) or closing (</xx>).
  */

 enum tag_type{opening,closing,failure};

 struct parse_tag_res
 {
   parse_tag_res(tag_type type_,const html_tag_data& tag_=html_tag_data()):
     type(type_),tag(tag_){}
   parse_tag_res(const parse_tag_res& x):type(x.type),tag(x.tag){}

   tag_type type;
   html_tag tag;
 };

 template<typename ForwardIterator>
 parse_tag_res parse_tag(ForwardIterator& first,ForwardIterator last)
 {
   html_tag_data  tag;
   std::string    buf;
   bool           in_quote=false;
   for(ForwardIterator it=first;it!=last;){
     char ch=*it++;
     if(ch=='>'&&!in_quote){             /* ignore '>'s if inside quotes */
       tag_type type;
       std::string::size_type
         bname=buf.find_first_not_of("\t\n\r "),
         ename=bname==std::string::npos?
           std::string::npos:
           buf.find_first_of("\t\n\r ",bname),
         bprop=ename==std::string::npos?
           std::string::npos:
           buf.find_first_not_of("\t\n\r ",ename);
       if(bname==ename){                 /* null name */
         return parse_tag_res(failure);
       }
       else if(buf[bname]=='/'){         /* closing tag */
         type=closing;
         ++bname;
       }
       else type=opening;
       tag.name=buf.substr(bname,ename-bname);
       std::transform(                   /* normalize tag name to lower case */
         tag.name.begin(),tag.name.end(),tag.name.begin(),
         (int(*)(int))std::tolower);
       if(bprop!=std::string::npos){
         tag.properties=buf.substr(bprop,buf.size());
       }
       first=it;                         /* result good, consume the chars */
       return parse_tag_res(type,tag);
     }
     else{
       if(ch=='"')in_quote=!in_quote;
       buf+=ch;
     }
   }
   return parse_tag_res(failure);        /* end reached and found no '>' */
 }

 /* A character context is just a vector containing the tags enclosing the
  * character, from the outermost level to the innermost.
  */

 typedef std::vector<html_tag>        html_context_data;
 typedef flyweight<html_context_data> html_context;

 /* A character is a char code plus its context.
  */

 struct character_data
 {
   character_data(char code_=0,html_context context_=html_context()):
     code(code_),context(context_){}
   character_data(const character_data& x):code(x.code),context(x.context){}

   char         code;
   html_context context;
 };

 bool operator==(const character_data& x,const character_data& y)
 {
   return x.code==y.code&&x.context==y.context;
 }

 #if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
 namespace boost{
 #endif

 std::size_t hash_value(const character_data& x)
 {
   std::size_t res=0;
   boost::hash_combine(res,x.code);
   boost::hash_combine(res,x.context);
   return res;
 }

 #if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
 } /* namespace boost */
 #endif

 typedef flyweight<character_data> character;

 /* scan_html converts HTML code into a stream of contextualized characters.
  */

 template<typename ForwardIterator,typename OutputIterator>
 void scan_html(ForwardIterator first,ForwardIterator last,OutputIterator out)
 {
   html_context_data context;
   while(first!=last){
     if(*first=='<'){                                 /* tag found */
       ++first;
       parse_tag_res res=parse_tag(first,last);
       if(res.type==opening){                         /* add to contex */
         context.push_back(res.tag);
         continue;
       }
       else if(res.type==closing){                    /* remove from context */
         /* Pop all tags from the innermost to the matching one; this takes
          * care of missing </xx>s like vg. in <ul><li>hello</ul>.
          */

         for(html_context_data::reverse_iterator rit=context.rbegin();
             rit!=context.rend();++rit){
           if(rit->get().name==res.tag.get().name){
             context.erase(rit.base()-1,context.end());
             break;
           }
         }
         continue;
       }
     }
     *out++=character(*first++,html_context(context));
   }
 }

 /* HTML-producing utilities */

 void print_opening_tag(std::ostream& os,const html_tag_data& x)
 {
   os<<"<"<<x.name;
   if(!x.properties.empty())os<<" "<<x.properties;
   os<<">";
 }

 void print_closing_tag(std::ostream& os,const html_tag_data& x)
 {
   /* SGML declarations (beginning with '!') are not closed */

   if(x.name[0]!='!')os<<"</"<<x.name<<">";
 }

 /* change_context takes contexts from and to with tags
  *
  *   from<- c1 ... cn fn+1 ... fm
  *   to  <- c1 ... cn tn+1 ... tk
  *
  * (that is, they share the first n tags, n might be 0), and
  * produces code closing fm ... fn+1 and opening tn+1 ... tk.
  */

 template<typename OutputIterator>
 void change_context(
   const html_context_data& from,const html_context_data& to,
   OutputIterator out)
 {
   std::ostringstream oss;
   html_context_data::const_iterator
     it0=from.begin(),
     it0_end=from.end(),
     it1=to.begin(),
     it1_end=to.end();
   for(;it0!=it0_end&&it1!=it1_end&&*it0==*it1;++it0,++it1);
   while(it0_end!=it0)print_closing_tag(oss,*--it0_end);
   while(it1!=it1_end)print_opening_tag(oss,*it1++);
   std::string str=oss.str();
   std::copy(str.begin(),str.end(),out);
 }

 /* produce_html is passed a bunch of contextualized characters and emits
  * the corresponding HTML. The algorithm is simple: tags are opened and closed
  * as a result of the context from one character to the following changing.
  */

 template<typename ForwardIterator,typename OutputIterator>
 void produce_html(ForwardIterator first,ForwardIterator last,OutputIterator out)
 {
   html_context context;
   while(first!=last){
     if(first->get().context!=context){
       change_context(context,first->get().context,out);
       context=first->get().context;
     }
     *out++=(first++)->get().code;
   }
   change_context(context,html_context(),out); /* close remaining context */
 }

 /* Without these explicit instantiations, MSVC++ 6.5/7.0 does not
  * find some friend operators in certain contexts.
  */

 character dummy1;
 html_tag  dummy2;

 int main()
 {
   std::cout<<"input html file: ";
   std::string in;
   std::getline(std::cin,in);
   std::ifstream ifs(in.c_str());
   if(!ifs){
     std::cout<<"can't open "<<in<<std::endl;
     std::exit(EXIT_FAILURE);
   }
   typedef std::istreambuf_iterator<char> istrbuf_iterator;
   std::vector<char> html_source;
   std::copy(
     istrbuf_iterator(ifs),istrbuf_iterator(),
     std::back_inserter(html_source));

   /* parse the HTML */

   std::vector<character> scanned_html;
   scan_html(
     html_source.begin(),html_source.end(),std::back_inserter(scanned_html));

   /* Now that we have the text as a vector of contextualized characters,
    * we can shuffle it around and manipulate in almost any way we please.
    * For instance, the following reverses the central portion of the doc.
    */

   std::reverse(
     scanned_html.begin()+scanned_html.size()/4,
     scanned_html.begin()+3*(scanned_html.size()/4));

   /* emit the resulting HTML */

   std::cout<<"output html file: ";
   std::string out;
   std::getline(std::cin,out);
   std::ofstream ofs(out.c_str());
   if(!ofs){
     std::cout<<"can't open "<<out<<std::endl;
     std::exit(EXIT_FAILURE);
   }
   typedef std::ostreambuf_iterator<char> ostrbuf_iterator;
   produce_html(scanned_html.begin(),scanned_html.end(),ostrbuf_iterator(ofs));

   return 0;
 }
	/* Boost.Flyweight example of flyweight-based formatted text processing.
	*
	* Copyright 2006-2008 Joaquin M Lopez Munoz.
	* Distributed under the Boost Software License, Version 1.0.
	* (See accompanying file LICENSE_1_0.txt or copy at
	* http://www.boost.org/LICENSE_1_0.txt)
	*
	* See http://www.boost.org/libs/flyweight for library home page.
	*/

	#include <boost/flyweight.hpp>
	#include <boost/functional/hash.hpp>
	#include <algorithm>
	#include <cctype>
	#include <cstdio>
	#include <fstream>
	#include <iostream>
	#include <iterator>
	#include <sstream>
	#include <string>
	#include <vector>

	#if defined(BOOST_NO_STDC_NAMESPACE)
	namespace std{using ::exit;using ::tolower;}
	#endif

	using namespace boost::flyweights;

	/* See the portability section of Boost.Hash at
	* http://boost.org/doc/html/hash/portability.html
	* for an explanation of the ADL-related workarounds.
	*/

	namespace boost{
	#if !defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
	namespace flyweights{
	#endif

	/* We hash the various flyweight types used in the program hashing
	* a pointer to their contents: this is consistent as equality of
	* flyweights implies equality of references.
	*/

	template<typename T>
	std::size_t hash_value(const flyweight<T>& x)
	{
	boost::hash<const T*> h;
	return h(&x.get());
	}

	#if !defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
	} /* namespace flyweights */
	#endif
	} /* namespace boost */

	/* An HTML tag consists of a name and optional properties of the form
	* name1=value1 ... namen=valuen. We do not need to parse the properties
	* for the purposes of the program, hence they are all stored in
	* html_tag_data::properties in raw form.
	*/

	struct html_tag_data
	{
	std::string name;
	std::string properties;
	};

	bool operator==(const html_tag_data& x,const html_tag_data& y)
	{
	return x.name==y.name&&x.properties==y.properties;
	}

	#if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
	namespace boost{
	#endif

	std::size_t hash_value(const html_tag_data& x)
	{
	std::size_t res=0;
	boost::hash_combine(res,x.name);
	boost::hash_combine(res,x.properties);
	return res;
	}

	#if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
	} /* namespace boost */
	#endif

	typedef flyweight<html_tag_data> html_tag;

	/* parse_tag is passed an iterator positioned at the first char of
	* the tag after the opening '<' and returns, if succesful, a parsed tag
	* and whether it is opening (<xx>) or closing (</xx>).
	*/

	enum tag_type{opening,closing,failure};

	struct parse_tag_res
	{
	parse_tag_res(tag_type type_,const html_tag_data& tag_=html_tag_data()):
	type(type_),tag(tag_){}
	parse_tag_res(const parse_tag_res& x):type(x.type),tag(x.tag){}

	tag_type type;
	html_tag tag;
	};

	template<typename ForwardIterator>
	parse_tag_res parse_tag(ForwardIterator& first,ForwardIterator last)
	{
	html_tag_data tag;
	std::string buf;
	bool in_quote=false;
	for(ForwardIterator it=first;it!=last;){
	char ch=*it++;
	if(ch=='>'&&!in_quote){ /* ignore '>'s if inside quotes */
	tag_type type;
	std::string::size_type
	bname=buf.find_first_not_of("\t\n\r "),
	ename=bname==std::string::npos?
	std::string::npos:
	buf.find_first_of("\t\n\r ",bname),
	bprop=ename==std::string::npos?
	std::string::npos:
	buf.find_first_not_of("\t\n\r ",ename);
	if(bname==ename){ /* null name */
	return parse_tag_res(failure);
	}
	else if(buf[bname]=='/'){ /* closing tag */
	type=closing;
	++bname;
	}
	else type=opening;
	tag.name=buf.substr(bname,ename-bname);
	std::transform( /* normalize tag name to lower case */
	tag.name.begin(),tag.name.end(),tag.name.begin(),
	(int(*)(int))std::tolower);
	if(bprop!=std::string::npos){
	tag.properties=buf.substr(bprop,buf.size());
	}
	first=it; /* result good, consume the chars */
	return parse_tag_res(type,tag);
	}
	else{
	if(ch=='"')in_quote=!in_quote;
	buf+=ch;
	}
	}
	return parse_tag_res(failure); /* end reached and found no '>' */
	}

	/* A character context is just a vector containing the tags enclosing the
	* character, from the outermost level to the innermost.
	*/

	typedef std::vector<html_tag> html_context_data;
	typedef flyweight<html_context_data> html_context;

	/* A character is a char code plus its context.
	*/

	struct character_data
	{
	character_data(char code_=0,html_context context_=html_context()):
	code(code_),context(context_){}
	character_data(const character_data& x):code(x.code),context(x.context){}

	char code;
	html_context context;
	};

	bool operator==(const character_data& x,const character_data& y)
	{
	return x.code==y.code&&x.context==y.context;
	}

	#if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
	namespace boost{
	#endif

	std::size_t hash_value(const character_data& x)
	{
	std::size_t res=0;
	boost::hash_combine(res,x.code);
	boost::hash_combine(res,x.context);
	return res;
	}

	#if defined(BOOST_NO_ARGUMENT_DEPENDENT_LOOKUP)
	} /* namespace boost */
	#endif

	typedef flyweight<character_data> character;

	/* scan_html converts HTML code into a stream of contextualized characters.
	*/

	template<typename ForwardIterator,typename OutputIterator>
	void scan_html(ForwardIterator first,ForwardIterator last,OutputIterator out)
	{
	html_context_data context;
	while(first!=last){
	if(first=='<'){ / tag found */
	++first;
	parse_tag_res res=parse_tag(first,last);
	if(res.type==opening){ /* add to contex */
	context.push_back(res.tag);
	continue;
	}
	else if(res.type==closing){ /* remove from context */
	/* Pop all tags from the innermost to the matching one; this takes
	* care of missing </xx>s like vg. in <ul><li>hello</ul>.
	*/

	for(html_context_data::reverse_iterator rit=context.rbegin();
	rit!=context.rend();++rit){
	if(rit->get().name==res.tag.get().name){
	context.erase(rit.base()-1,context.end());
	break;
	}
	}
	continue;
	}
	}
	out++=character(first++,html_context(context));
	}
	}

	/* HTML-producing utilities */

	void print_opening_tag(std::ostream& os,const html_tag_data& x)
	{
	os<<"<"<<x.name;
	if(!x.properties.empty())os<<" "<<x.properties;
	os<<">";
	}

	void print_closing_tag(std::ostream& os,const html_tag_data& x)
	{
	/* SGML declarations (beginning with '!') are not closed */

	if(x.name[0]!='!')os<<"</"<<x.name<<">";
	}

	/* change_context takes contexts from and to with tags
	*
	* from<- c1 ... cn fn+1 ... fm
	* to <- c1 ... cn tn+1 ... tk
	*
	* (that is, they share the first n tags, n might be 0), and
	* produces code closing fm ... fn+1 and opening tn+1 ... tk.
	*/

	template<typename OutputIterator>
	void change_context(
	const html_context_data& from,const html_context_data& to,
	OutputIterator out)
	{
	std::ostringstream oss;
	html_context_data::const_iterator
	it0=from.begin(),
	it0_end=from.end(),
	it1=to.begin(),
	it1_end=to.end();
	for(;it0!=it0_end&&it1!=it1_end&&it0==it1;++it0,++it1);
	while(it0_end!=it0)print_closing_tag(oss,*--it0_end);
	while(it1!=it1_end)print_opening_tag(oss,*it1++);
	std::string str=oss.str();
	std::copy(str.begin(),str.end(),out);
	}

	/* produce_html is passed a bunch of contextualized characters and emits
	* the corresponding HTML. The algorithm is simple: tags are opened and closed
	* as a result of the context from one character to the following changing.
	*/

	template<typename ForwardIterator,typename OutputIterator>
	void produce_html(ForwardIterator first,ForwardIterator last,OutputIterator out)
	{
	html_context context;
	while(first!=last){
	if(first->get().context!=context){
	change_context(context,first->get().context,out);
	context=first->get().context;
	}
	*out++=(first++)->get().code;
	}
	change_context(context,html_context(),out); /* close remaining context */
	}

	/* Without these explicit instantiations, MSVC++ 6.5/7.0 does not
	* find some friend operators in certain contexts.
	*/

	character dummy1;
	html_tag dummy2;

	int main()
	{
	std::cout<<"input html file: ";
	std::string in;
	std::getline(std::cin,in);
	std::ifstream ifs(in.c_str());
	if(!ifs){
	std::cout<<"can't open "<<in<<std::endl;
	std::exit(EXIT_FAILURE);
	}
	typedef std::istreambuf_iterator<char> istrbuf_iterator;
	std::vector<char> html_source;
	std::copy(
	istrbuf_iterator(ifs),istrbuf_iterator(),
	std::back_inserter(html_source));

	/* parse the HTML */

	std::vector<character> scanned_html;
	scan_html(
	html_source.begin(),html_source.end(),std::back_inserter(scanned_html));

	/* Now that we have the text as a vector of contextualized characters,
	* we can shuffle it around and manipulate in almost any way we please.
	* For instance, the following reverses the central portion of the doc.
	*/

	std::reverse(
	scanned_html.begin()+scanned_html.size()/4,
	scanned_html.begin()+3*(scanned_html.size()/4));

	/* emit the resulting HTML */

	std::cout<<"output html file: ";
	std::string out;
	std::getline(std::cin,out);
	std::ofstream ofs(out.c_str());
	if(!ofs){
	std::cout<<"can't open "<<out<<std::endl;
	std::exit(EXIT_FAILURE);
	}
	typedef std::ostreambuf_iterator<char> ostrbuf_iterator;
	produce_html(scanned_html.begin(),scanned_html.end(),ostrbuf_iterator(ofs));

	return 0;
	}