| // Boost string_algo library example file ---------------------------------// |
| |
| // Copyright Pavol Droba 2002-2003. Use, modification and |
| // distribution is subject to the Boost Software License, Version |
| // 1.0. (See accompanying file LICENSE_1_0.txt or copy at |
| // http://www.boost.org/LICENSE_1_0.txt) |
| |
| // See http://www.boost.org for updates, documentation, and revision history. |
| |
| /* |
| RLE compression using replace framework. Goal is to compress a sequence of |
| repeating characters into 3 bytes ( repeat mark, character and repetition count ). |
| For simplification, it works only on numeric-value sequences. |
| */ |
| |
| #include <string> |
| #include <iostream> |
| #include <limits> |
| #include <boost/detail/iterator.hpp> |
| #include <boost/algorithm/string/find_format.hpp> |
| #include <boost/algorithm/string/finder.hpp> |
| |
| using namespace std; |
| using namespace boost; |
| |
| // replace mark specification, specialize for a specific element type |
| template< typename T > T repeat_mark() { return (std::numeric_limits<T>::max)(); }; |
| |
| // Compression ----------------------------------------------------------------------- |
| |
| |
| // compress finder -rle |
| /* |
| Find a sequence which can be compressed. It has to be at least 3-character long |
| sequence of repetitive characters |
| */ |
| struct find_compressF |
| { |
| // Construction |
| find_compressF() {} |
| |
| // Operation |
| template<typename ForwardIteratorT> |
| iterator_range<ForwardIteratorT> operator()( |
| ForwardIteratorT Begin, |
| ForwardIteratorT End ) const |
| { |
| typedef ForwardIteratorT input_iterator_type; |
| typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type; |
| typedef iterator_range<input_iterator_type> result_type; |
| |
| // begin of the matching segment |
| input_iterator_type MStart=End; |
| // Repetition counter |
| value_type Cnt=0; |
| |
| // Search for a sequence of repetitive characters |
| for(input_iterator_type It=Begin; It!=End;) |
| { |
| input_iterator_type It2=It++; |
| |
| if ( It==End || Cnt>=(std::numeric_limits<value_type>::max)() ) |
| { |
| return result_type( MStart, It ); |
| } |
| |
| if ( *It==*It2 ) |
| { |
| if ( MStart==End ) |
| { |
| // Mark the start |
| MStart=It2; |
| } |
| |
| // Increate repetition counter |
| Cnt++; |
| } |
| else |
| { |
| if ( MStart!=End ) |
| { |
| if ( Cnt>2 ) |
| return result_type( MStart, It ); |
| else |
| { |
| MStart=End; |
| Cnt=0; |
| } |
| } |
| } |
| } |
| |
| return result_type( End, End ); |
| } |
| }; |
| |
| // rle compress format |
| /* |
| Transform a sequence into repeat mark, character and count |
| */ |
| template<typename SeqT> |
| struct format_compressF |
| { |
| private: |
| typedef SeqT result_type; |
| typedef typename SeqT::value_type value_type; |
| |
| public: |
| // Construction |
| format_compressF() {}; |
| |
| // Operation |
| template< typename ReplaceT > |
| result_type operator()( const ReplaceT& Replace ) const |
| { |
| SeqT r; |
| if(!Replace.empty()) |
| { |
| r.push_back( repeat_mark<value_type>() ); |
| r.push_back( *(Replace.begin()) ); |
| r.push_back( value_type( Replace.size() ) ); |
| } |
| |
| return r; |
| } |
| }; |
| |
| // Decompression ----------------------------------------------------------------------- |
| |
| |
| // find decompress-rle functor |
| /* |
| find a repetition block |
| */ |
| struct find_decompressF |
| { |
| // Construction |
| find_decompressF() {} |
| |
| // Operation |
| template<typename ForwardIteratorT> |
| iterator_range<ForwardIteratorT> operator()( |
| ForwardIteratorT Begin, |
| ForwardIteratorT End ) const |
| { |
| typedef ForwardIteratorT input_iterator_type; |
| typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type; |
| typedef iterator_range<input_iterator_type> result_type; |
| |
| for(input_iterator_type It=Begin; It!=End; It++) |
| { |
| if( *It==repeat_mark<value_type>() ) |
| { |
| // Repeat mark found, extract body |
| input_iterator_type It2=It++; |
| |
| if ( It==End ) break; |
| It++; |
| if ( It==End ) break; |
| It++; |
| |
| return result_type( It2, It ); |
| } |
| } |
| |
| return result_type( End, End ); |
| } |
| }; |
| |
| // rle decompress format |
| /* |
| transform a repetition block into a sequence of characters |
| */ |
| template< typename SeqT > |
| struct format_decompressF |
| { |
| private: |
| typedef SeqT result_type; |
| typedef typename SeqT::value_type value_type; |
| |
| public: |
| // Construction |
| format_decompressF() {}; |
| |
| // Operation |
| template< typename ReplaceT > |
| result_type operator()( const ReplaceT& Replace ) const |
| { |
| SeqT r; |
| |
| if(!Replace.empty()) |
| { |
| // extract info |
| typename ReplaceT::const_iterator It=Replace.begin(); |
| |
| value_type Value=*(++It); |
| value_type Repeat=*(++It); |
| |
| for( value_type Index=0; Index<Repeat; Index++ ) r.push_back( Value ); |
| } |
| |
| return r; |
| } |
| }; |
| |
| |
| int main() |
| { |
| cout << "* RLE Compression Example *" << endl << endl; |
| |
| string original("123_AA_*ZZZZZZZZZZZZZZ*34"); |
| |
| // copy compression |
| string compress=find_format_all_copy( |
| original, |
| find_compressF(), |
| format_compressF<string>() ); |
| |
| cout << "Compressed string: " << compress << endl; |
| |
| // Copy decompression |
| string decompress=find_format_all_copy( |
| compress, |
| find_decompressF(), |
| format_decompressF<string>() ); |
| |
| cout << "Decompressed string: " << decompress << endl; |
| |
| // in-place compression |
| find_format_all( |
| original, |
| find_compressF(), |
| format_compressF<string>() ); |
| |
| cout << "Compressed string: " << original << endl; |
| |
| // in-place decompression |
| find_format_all( |
| original, |
| find_decompressF(), |
| format_decompressF<string>() ); |
| |
| cout << "Decompressed string: " << original << endl; |
| |
| cout << endl; |
| |
| return 0; |
| } |