boost/libs/sort/example/generalizedstruct.cpp - nest-cam/4320010/boost - Git at Google

 // This example shows how to sort structs using complex multiple part keys using
 // string_sort.
 //
 //  Copyright Steven Ross 2009-2014.
 //
 // Distributed under the Boost Software License, Version 1.0.
 //    (See accompanying file LICENSE_1_0.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)

 //  See http://www.boost.org/libs/sort for library home page.

 #include <boost/sort/spreadsort/string_sort.hpp>
 #include <boost/sort/spreadsort/float_sort.hpp>
 #include <time.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <algorithm>
 #include <vector>
 #include <iostream>
 #include <fstream>
 #include <string>
 using std::string;
 using namespace boost::sort::spreadsort;

 //[generalized_functors
 struct DATA_TYPE {
   time_t birth;
   float net_worth;
   string first_name;
   string last_name;
 };

 static const int birth_size = sizeof(time_t);
 static const int first_name_offset = birth_size + sizeof(float);
 static const boost::uint64_t base_mask = 0xff;

 struct lessthan {
   inline bool operator()(const DATA_TYPE &x, const DATA_TYPE &y) const {
     if (x.birth != y.birth) {
       return x.birth < y.birth;
     }
     if (x.net_worth != y.net_worth) {
       return x.net_worth < y.net_worth;
     }
     if (x.first_name != y.first_name) {
       return x.first_name < y.first_name;
     }
     return x.last_name < y.last_name;
   }
 };

 struct bracket {
   inline unsigned char operator()(const DATA_TYPE &x, size_t offset) const {
     // Sort date as a signed int, returning the appropriate byte.
     if (offset < birth_size) {
       const int bit_shift = 8 * (birth_size - offset - 1);
       unsigned char result = (x.birth & (base_mask << bit_shift)) >> bit_shift;
       // Handling the sign bit.  Unnecessary if the data is always positive.
       if (offset == 0) {
         return result ^ 128;
       }

       return result;
     }

     // Sort a signed float.  This requires reversing the order of negatives
     // because of the way floats are represented in bits.
     if (offset < first_name_offset) {
       const int bit_shift = 8 * (first_name_offset - offset - 1);
       unsigned key = float_mem_cast<float, unsigned>(x.net_worth);
       unsigned char result = (key & (base_mask << bit_shift)) >> bit_shift;
       // Handling the sign.
       if (x.net_worth < 0) {
         return 255 - result;
       }
       // Increasing positives so they are higher than negatives.
       if (offset == birth_size) {
         return 128 + result;
       }

       return result;
     }

     // Sort a string that is before the end.  This approach supports embedded
     // nulls.  If embedded nulls are not required, then just delete the "* 2"
     // and the inside of the following if just becomes:
     // return x.first_name[offset - first_name_offset];
     const unsigned first_name_end_offset =
       first_name_offset + x.first_name.size() * 2;
     if (offset < first_name_end_offset) {
       int char_offset = offset - first_name_offset;
       // This signals that the string continues.
       if (!(char_offset & 1)) {
         return 1;
       }
       return x.first_name[char_offset >> 1];
     }

     // This signals that the string has ended, so that shorter strings come
     // before longer ones.
     if (offset == first_name_end_offset) {
       return 0;
     }

     // The final string needs no special consideration.
     return x.last_name[offset - first_name_end_offset - 1];
   }
 };

 struct getsize {
   inline size_t operator()(const DATA_TYPE &x) const {
     return first_name_offset + x.first_name.size() * 2 + 1 +
       x.last_name.size();
   }
 };
 //] [/generalized_functors]

 //Pass in an argument to test std::sort
 int main(int argc, const char ** argv) {
   std::ifstream indata;
   std::ofstream outfile;
   bool stdSort = false;
   unsigned loopCount = 1;
   for (int u = 1; u < argc; ++u) {
     if (std::string(argv[u]) == "-std")
       stdSort = true;
     else
       loopCount = atoi(argv[u]);
   }
   double total = 0.0;
   //Run multiple loops, if requested
   std::vector<DATA_TYPE> array;
   for (unsigned u = 0; u < loopCount; ++u) {
     indata.open("input.txt", std::ios_base::in | std::ios_base::binary);
     if (indata.bad()) {
       printf("input.txt could not be opened\n");
       return 1;
     }

     // Read in the data.
     DATA_TYPE inval;
     while (!indata.eof() ) {
       indata >> inval.first_name;
       indata >> inval.last_name;
       indata.read(reinterpret_cast<char *>(&(inval.birth)), birth_size);
       indata.read(reinterpret_cast<char *>(&(inval.net_worth)), sizeof(float));
       // Handling nan.
       if (inval.net_worth != inval.net_worth) {
         inval.net_worth = 0;
       }
       if (indata.eof())
         break;
       array.push_back(inval);
     }
     indata.close();

     // Sort the data.
     clock_t start, end;
     double elapsed;
     start = clock();
     if (stdSort) {
       std::sort(array.begin(), array.end(), lessthan());
     } else {
 //[generalized_functors_call
       string_sort(array.begin(), array.end(), bracket(), getsize(), lessthan());
 //] [/generalized_functors_call]
     }
     end = clock();
     elapsed = static_cast<double>(end - start);
     if (stdSort) {
       outfile.open("standard_sort_out.txt", std::ios_base::out |
                    std::ios_base::binary | std::ios_base::trunc);
     } else {
       outfile.open("boost_sort_out.txt", std::ios_base::out |
                    std::ios_base::binary | std::ios_base::trunc);
     }
     if (outfile.good()) {
       for (unsigned u = 0; u < array.size(); ++u)
         outfile << array[u].birth << " " << array[u].net_worth << " "
                 << array[u].first_name << " " << array[u].last_name << "\n";
       outfile.close();
     }
     total += elapsed;
     array.clear();
   }
   if (stdSort) {
     printf("std::sort elapsed time %f\n", total / CLOCKS_PER_SEC);
   } else {
     printf("spreadsort elapsed time %f\n", total / CLOCKS_PER_SEC);
   }
   return 0;
 }
	// This example shows how to sort structs using complex multiple part keys using
	// string_sort.
	//
	// Copyright Steven Ross 2009-2014.
	//
	// Distributed under the Boost Software License, Version 1.0.
	// (See accompanying file LICENSE_1_0.txt or copy at
	// http://www.boost.org/LICENSE_1_0.txt)

	// See http://www.boost.org/libs/sort for library home page.

	#include <boost/sort/spreadsort/string_sort.hpp>
	#include <boost/sort/spreadsort/float_sort.hpp>
	#include <time.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <algorithm>
	#include <vector>
	#include <iostream>
	#include <fstream>
	#include <string>
	using std::string;
	using namespace boost::sort::spreadsort;

	//[generalized_functors
	struct DATA_TYPE {
	time_t birth;
	float net_worth;
	string first_name;
	string last_name;
	};

	static const int birth_size = sizeof(time_t);
	static const int first_name_offset = birth_size + sizeof(float);
	static const boost::uint64_t base_mask = 0xff;

	struct lessthan {
	inline bool operator()(const DATA_TYPE &x, const DATA_TYPE &y) const {
	if (x.birth != y.birth) {
	return x.birth < y.birth;
	}
	if (x.net_worth != y.net_worth) {
	return x.net_worth < y.net_worth;
	}
	if (x.first_name != y.first_name) {
	return x.first_name < y.first_name;
	}
	return x.last_name < y.last_name;
	}
	};

	struct bracket {
	inline unsigned char operator()(const DATA_TYPE &x, size_t offset) const {
	// Sort date as a signed int, returning the appropriate byte.
	if (offset < birth_size) {
	const int bit_shift = 8 * (birth_size - offset - 1);
	unsigned char result = (x.birth & (base_mask << bit_shift)) >> bit_shift;
	// Handling the sign bit. Unnecessary if the data is always positive.
	if (offset == 0) {
	return result ^ 128;
	}

	return result;
	}

	// Sort a signed float. This requires reversing the order of negatives
	// because of the way floats are represented in bits.
	if (offset < first_name_offset) {
	const int bit_shift = 8 * (first_name_offset - offset - 1);
	unsigned key = float_mem_cast<float, unsigned>(x.net_worth);
	unsigned char result = (key & (base_mask << bit_shift)) >> bit_shift;
	// Handling the sign.
	if (x.net_worth < 0) {
	return 255 - result;
	}
	// Increasing positives so they are higher than negatives.
	if (offset == birth_size) {
	return 128 + result;
	}

	return result;
	}

	// Sort a string that is before the end. This approach supports embedded
	// nulls. If embedded nulls are not required, then just delete the "* 2"
	// and the inside of the following if just becomes:
	// return x.first_name[offset - first_name_offset];
	const unsigned first_name_end_offset =
	first_name_offset + x.first_name.size() * 2;
	if (offset < first_name_end_offset) {
	int char_offset = offset - first_name_offset;
	// This signals that the string continues.
	if (!(char_offset & 1)) {
	return 1;
	}
	return x.first_name[char_offset >> 1];
	}

	// This signals that the string has ended, so that shorter strings come
	// before longer ones.
	if (offset == first_name_end_offset) {
	return 0;
	}

	// The final string needs no special consideration.
	return x.last_name[offset - first_name_end_offset - 1];
	}
	};

	struct getsize {
	inline size_t operator()(const DATA_TYPE &x) const {
	return first_name_offset + x.first_name.size() * 2 + 1 +
	x.last_name.size();
	}
	};
	//] [/generalized_functors]

	//Pass in an argument to test std::sort
	int main(int argc, const char ** argv) {
	std::ifstream indata;
	std::ofstream outfile;
	bool stdSort = false;
	unsigned loopCount = 1;
	for (int u = 1; u < argc; ++u) {
	if (std::string(argv[u]) == "-std")
	stdSort = true;
	else
	loopCount = atoi(argv[u]);
	}
	double total = 0.0;
	//Run multiple loops, if requested
	std::vector<DATA_TYPE> array;
	for (unsigned u = 0; u < loopCount; ++u) {
	indata.open("input.txt", std::ios_base::in \| std::ios_base::binary);
	if (indata.bad()) {
	printf("input.txt could not be opened\n");
	return 1;
	}

	// Read in the data.
	DATA_TYPE inval;
	while (!indata.eof() ) {
	indata >> inval.first_name;
	indata >> inval.last_name;
	indata.read(reinterpret_cast<char *>(&(inval.birth)), birth_size);
	indata.read(reinterpret_cast<char *>(&(inval.net_worth)), sizeof(float));
	// Handling nan.
	if (inval.net_worth != inval.net_worth) {
	inval.net_worth = 0;
	}
	if (indata.eof())
	break;
	array.push_back(inval);
	}
	indata.close();

	// Sort the data.
	clock_t start, end;
	double elapsed;
	start = clock();
	if (stdSort) {
	std::sort(array.begin(), array.end(), lessthan());
	} else {
	//[generalized_functors_call
	string_sort(array.begin(), array.end(), bracket(), getsize(), lessthan());
	//] [/generalized_functors_call]
	}
	end = clock();
	elapsed = static_cast<double>(end - start);
	if (stdSort) {
	outfile.open("standard_sort_out.txt", std::ios_base::out \|
	std::ios_base::binary \| std::ios_base::trunc);
	} else {
	outfile.open("boost_sort_out.txt", std::ios_base::out \|
	std::ios_base::binary \| std::ios_base::trunc);
	}
	if (outfile.good()) {
	for (unsigned u = 0; u < array.size(); ++u)
	outfile << array[u].birth << " " << array[u].net_worth << " "
	<< array[u].first_name << " " << array[u].last_name << "\n";
	outfile.close();
	}
	total += elapsed;
	array.clear();
	}
	if (stdSort) {
	printf("std::sort elapsed time %f\n", total / CLOCKS_PER_SEC);
	} else {
	printf("spreadsort elapsed time %f\n", total / CLOCKS_PER_SEC);
	}
	return 0;
	}