faux-folly/folly/futures/test/Benchmark.cpp - nest-cam/4320010/faux-folly - Git at Google

 /*
  * Copyright 2015 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <gflags/gflags.h>

 #include <folly/Baton.h>
 #include <folly/Benchmark.h>
 #include <folly/futures/Future.h>
 #include <folly/futures/Promise.h>
 #include <folly/futures/InlineExecutor.h>

 #include <semaphore.h>
 #include <vector>

 using namespace folly;

 namespace {

 template <class T>
 T incr(Try<T>&& t) {
   return t.value() + 1;
 }

 void someThens(size_t n) {
   auto f = makeFuture<int>(42);
   for (size_t i = 0; i < n; i++) {
     f = f.then(incr<int>);
   }
 }

 } // anonymous namespace

 BENCHMARK(constantFuture) {
   makeFuture(42);
 }

 BENCHMARK_RELATIVE(promiseAndFuture) {
   Promise<int> p;
   Future<int> f = p.getFuture();
   p.setValue(42);
   f.value();
 }

 BENCHMARK_RELATIVE(withThen) {
   Promise<int> p;
   Future<int> f = p.getFuture().then(incr<int>);
   p.setValue(42);
   f.value();
 }

 // thens
 BENCHMARK_DRAW_LINE()

 BENCHMARK(oneThen) {
   someThens(1);
 }

 // look for >= 50% relative
 BENCHMARK_RELATIVE(twoThens) {
   someThens(2);
 }

 // look for >= 25% relative
 BENCHMARK_RELATIVE(fourThens) {
   someThens(4);
 }

 // look for >= 1% relative
 BENCHMARK_RELATIVE(hundredThens) {
   someThens(100);
 }

 // Lock contention. Although in practice fulfills tend to be temporally
 // separate from then()s, still sometimes they will be concurrent. So the
 // higher this number is, the better.
 BENCHMARK_DRAW_LINE()

 BENCHMARK(no_contention) {
   std::vector<Promise<int>> promises(10000);
   std::vector<Future<int>> futures;
   std::thread producer, consumer;

   BENCHMARK_SUSPEND {
     folly::Baton<> b1, b2;
     for (auto& p : promises)
       futures.push_back(p.getFuture());

     consumer = std::thread([&]{
       b1.post();
       for (auto& f : futures) f.then(incr<int>);
     });
     consumer.join();

     producer = std::thread([&]{
       b2.post();
       for (auto& p : promises) p.setValue(42);
     });

     b1.wait();
     b2.wait();
   }

   // The only thing we are measuring is how long fulfill + callbacks take
   producer.join();
 }

 BENCHMARK_RELATIVE(contention) {
   std::vector<Promise<int>> promises(10000);
   std::vector<Future<int>> futures;
   std::thread producer, consumer;
   sem_t sem;
   sem_init(&sem, 0, 0);

   BENCHMARK_SUSPEND {
     folly::Baton<> b1, b2;
     for (auto& p : promises)
       futures.push_back(p.getFuture());

     consumer = std::thread([&]{
       b1.post();
       for (auto& f : futures) {
         sem_wait(&sem);
         f.then(incr<int>);
       }
     });

     producer = std::thread([&]{
       b2.post();
       for (auto& p : promises) {
         sem_post(&sem);
         p.setValue(42);
       }
     });

     b1.wait();
     b2.wait();
   }

   // The astute reader will notice that we're not *precisely* comparing apples
   // to apples here. Well, maybe it's like comparing Granny Smith to
   // Braeburn or something. In the serial version, we waited for the futures
   // to be all set up, but here we are probably still doing that work
   // (although in parallel). But even though there is more work (on the order
   // of 2x), it is being done by two threads. Hopefully most of the difference
   // we see is due to lock contention and not false parallelism.
   //
   // Be warned that if the box is under heavy load, this will greatly skew
   // these results (scheduling overhead will begin to dwarf lock contention).
   // I'm not sure but I'd guess in Windtunnel this will mean large variance,
   // because I expect they load the boxes as much as they can?
   consumer.join();
   producer.join();
 }

 BENCHMARK_DRAW_LINE();

 // The old way. Throw an exception, and rethrow to access it upstream.
 void throwAndCatchImpl() {
   makeFuture()
       .then([](Try<Unit>&&){ throw std::runtime_error("oh no"); })
       .then([](Try<Unit>&& t) {
         try {
           t.value();
         } catch(const std::runtime_error& e) {
           // ...
           return;
         }
         CHECK(false);
       });
 }

 // Not much better. Throw an exception, and access it via the wrapper upstream.
 // Actually a little worse due to wrapper overhead. then() won't know that the
 // exception is a runtime_error, so will have to store it as an exception_ptr
 // anyways. withException will therefore have to rethrow. Note that if we threw
 // std::exception instead, we would see some wins, as that's the type then()
 // will try to wrap, so no exception_ptrs/rethrows are necessary.
 void throwAndCatchWrappedImpl() {
   makeFuture()
       .then([](Try<Unit>&&){ throw std::runtime_error("oh no"); })
       .then([](Try<Unit>&& t) {
         auto caught = t.withException<std::runtime_error>(
             [](const std::runtime_error& e){
               // ...
             });
         CHECK(caught);
       });
 }

 // Better. Wrap an exception, and rethrow to access it upstream.
 void throwWrappedAndCatchImpl() {
   makeFuture()
       .then([](Try<Unit>&&){
         return makeFuture<Unit>(std::runtime_error("oh no"));
       })
       .then([](Try<Unit>&& t) {
         try {
           t.value();
         } catch(const std::runtime_error& e) {
           // ...
           return;
         }
         CHECK(false);
       });
 }

 // The new way. Wrap an exception, and access it via the wrapper upstream
 void throwWrappedAndCatchWrappedImpl() {
   makeFuture()
       .then([](Try<Unit>&&){
         return makeFuture<Unit>(std::runtime_error("oh no"));
       })
       .then([](Try<Unit>&& t){
         auto caught = t.withException<std::runtime_error>(
             [](const std::runtime_error& e){
               // ...
             });
         CHECK(caught);
       });
 }

 // Simulate heavy contention on func
 void contend(void(*func)()) {
   folly::BenchmarkSuspender s;
   const int N = 100;
   const int iters = 1000;
   pthread_barrier_t barrier;
   pthread_barrier_init(&barrier, nullptr, N+1);
   std::vector<std::thread> threads;
   for (int i = 0; i < N; i++) {
     threads.push_back(std::thread([&](){
       pthread_barrier_wait(&barrier);
       for (int j = 0; j < iters; j++) {
         func();
       }
     }));
   }
   pthread_barrier_wait(&barrier);
   s.dismiss();
   for (auto& t : threads) {
     t.join();
   }
   s.rehire();
   pthread_barrier_destroy(&barrier);
 }

 BENCHMARK(throwAndCatch) {
   throwAndCatchImpl();
 }

 BENCHMARK_RELATIVE(throwAndCatchWrapped) {
   throwAndCatchWrappedImpl();
 }

 BENCHMARK_RELATIVE(throwWrappedAndCatch) {
   throwWrappedAndCatchImpl();
 }

 BENCHMARK_RELATIVE(throwWrappedAndCatchWrapped) {
   throwWrappedAndCatchWrappedImpl();
 }

 BENCHMARK_DRAW_LINE();

 BENCHMARK(throwAndCatchContended) {
   contend(throwAndCatchImpl);
 }

 BENCHMARK_RELATIVE(throwAndCatchWrappedContended) {
   contend(throwAndCatchWrappedImpl);
 }

 BENCHMARK_RELATIVE(throwWrappedAndCatchContended) {
   contend(throwWrappedAndCatchImpl);
 }

 BENCHMARK_RELATIVE(throwWrappedAndCatchWrappedContended) {
   contend(throwWrappedAndCatchWrappedImpl);
 }

 InlineExecutor exe;

 template <class T>
 Future<T> fGen() {
   Promise<T> p;
   auto f = p.getFuture()
     .then([] (T&& t) {
       return std::move(t);
     })
     .then([] (T&& t) {
       return makeFuture(std::move(t));
     })
     .via(&exe)
     .then([] (T&& t) {
       return std::move(t);
     })
     .then([] (T&& t) {
       return makeFuture(std::move(t));
     });
   p.setValue(T());
   return f;
 }

 template <class T>
 std::vector<Future<T>> fsGen() {
   std::vector<Future<T>> fs;
   for (auto i = 0; i < 10; i++) {
     fs.push_back(fGen<T>());
   }
   return fs;
 }

 template <class T>
 void complexBenchmark() {
   collect(fsGen<T>());
   collectAll(fsGen<T>());
   collectAny(fsGen<T>());
   futures::map(fsGen<T>(), [] (const T& t) {
     return t;
   });
   futures::map(fsGen<T>(), [] (const T& t) {
     return makeFuture(T(t));
   });
 }

 BENCHMARK_DRAW_LINE();

 template <size_t S>
 struct Blob {
   char buf[S];
 };

 BENCHMARK(complexUnit) {
   complexBenchmark<Unit>();
 }

 BENCHMARK_RELATIVE(complexBlob4) {
   complexBenchmark<Blob<4>>();
 }

 BENCHMARK_RELATIVE(complexBlob8) {
   complexBenchmark<Blob<8>>();
 }

 BENCHMARK_RELATIVE(complexBlob64) {
   complexBenchmark<Blob<64>>();
 }

 BENCHMARK_RELATIVE(complexBlob128) {
   complexBenchmark<Blob<128>>();
 }

 BENCHMARK_RELATIVE(complexBlob256) {
   complexBenchmark<Blob<256>>();
 }

 BENCHMARK_RELATIVE(complexBlob512) {
   complexBenchmark<Blob<512>>();
 }

 BENCHMARK_RELATIVE(complexBlob1024) {
   complexBenchmark<Blob<1024>>();
 }

 BENCHMARK_RELATIVE(complexBlob2048) {
   complexBenchmark<Blob<2048>>();
 }

 BENCHMARK_RELATIVE(complexBlob4096) {
   complexBenchmark<Blob<4096>>();
 }

 int main(int argc, char** argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
   folly::runBenchmarks();
   return 0;
 }
	/*
	* Copyright 2015 Facebook, Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include <gflags/gflags.h>

	#include <folly/Baton.h>
	#include <folly/Benchmark.h>
	#include <folly/futures/Future.h>
	#include <folly/futures/Promise.h>
	#include <folly/futures/InlineExecutor.h>

	#include <semaphore.h>
	#include <vector>

	using namespace folly;

	namespace {

	template <class T>
	T incr(Try<T>&& t) {
	return t.value() + 1;
	}

	void someThens(size_t n) {
	auto f = makeFuture<int>(42);
	for (size_t i = 0; i < n; i++) {
	f = f.then(incr<int>);
	}
	}

	} // anonymous namespace

	BENCHMARK(constantFuture) {
	makeFuture(42);
	}

	BENCHMARK_RELATIVE(promiseAndFuture) {
	Promise<int> p;
	Future<int> f = p.getFuture();
	p.setValue(42);
	f.value();
	}

	BENCHMARK_RELATIVE(withThen) {
	Promise<int> p;
	Future<int> f = p.getFuture().then(incr<int>);
	p.setValue(42);
	f.value();
	}

	// thens
	BENCHMARK_DRAW_LINE()

	BENCHMARK(oneThen) {
	someThens(1);
	}

	// look for >= 50% relative
	BENCHMARK_RELATIVE(twoThens) {
	someThens(2);
	}

	// look for >= 25% relative
	BENCHMARK_RELATIVE(fourThens) {
	someThens(4);
	}

	// look for >= 1% relative
	BENCHMARK_RELATIVE(hundredThens) {
	someThens(100);
	}

	// Lock contention. Although in practice fulfills tend to be temporally
	// separate from then()s, still sometimes they will be concurrent. So the
	// higher this number is, the better.
	BENCHMARK_DRAW_LINE()

	BENCHMARK(no_contention) {
	std::vector<Promise<int>> promises(10000);
	std::vector<Future<int>> futures;
	std::thread producer, consumer;

	BENCHMARK_SUSPEND {
	folly::Baton<> b1, b2;
	for (auto& p : promises)
	futures.push_back(p.getFuture());

	consumer = std::thread([&]{
	b1.post();
	for (auto& f : futures) f.then(incr<int>);
	});
	consumer.join();

	producer = std::thread([&]{
	b2.post();
	for (auto& p : promises) p.setValue(42);
	});

	b1.wait();
	b2.wait();
	}

	// The only thing we are measuring is how long fulfill + callbacks take
	producer.join();
	}

	BENCHMARK_RELATIVE(contention) {
	std::vector<Promise<int>> promises(10000);
	std::vector<Future<int>> futures;
	std::thread producer, consumer;
	sem_t sem;
	sem_init(&sem, 0, 0);

	BENCHMARK_SUSPEND {
	folly::Baton<> b1, b2;
	for (auto& p : promises)
	futures.push_back(p.getFuture());

	consumer = std::thread([&]{
	b1.post();
	for (auto& f : futures) {
	sem_wait(&sem);
	f.then(incr<int>);
	}
	});

	producer = std::thread([&]{
	b2.post();
	for (auto& p : promises) {
	sem_post(&sem);
	p.setValue(42);
	}
	});

	b1.wait();
	b2.wait();
	}

	// The astute reader will notice that we're not precisely comparing apples
	// to apples here. Well, maybe it's like comparing Granny Smith to
	// Braeburn or something. In the serial version, we waited for the futures
	// to be all set up, but here we are probably still doing that work
	// (although in parallel). But even though there is more work (on the order
	// of 2x), it is being done by two threads. Hopefully most of the difference
	// we see is due to lock contention and not false parallelism.
	//
	// Be warned that if the box is under heavy load, this will greatly skew
	// these results (scheduling overhead will begin to dwarf lock contention).
	// I'm not sure but I'd guess in Windtunnel this will mean large variance,
	// because I expect they load the boxes as much as they can?
	consumer.join();
	producer.join();
	}

	BENCHMARK_DRAW_LINE();

	// The old way. Throw an exception, and rethrow to access it upstream.
	void throwAndCatchImpl() {
	makeFuture()
	.then([](Try<Unit>&&){ throw std::runtime_error("oh no"); })
	.then([](Try<Unit>&& t) {
	try {
	t.value();
	} catch(const std::runtime_error& e) {
	// ...
	return;
	}
	CHECK(false);
	});
	}

	// Not much better. Throw an exception, and access it via the wrapper upstream.
	// Actually a little worse due to wrapper overhead. then() won't know that the
	// exception is a runtime_error, so will have to store it as an exception_ptr
	// anyways. withException will therefore have to rethrow. Note that if we threw
	// std::exception instead, we would see some wins, as that's the type then()
	// will try to wrap, so no exception_ptrs/rethrows are necessary.
	void throwAndCatchWrappedImpl() {
	makeFuture()
	.then([](Try<Unit>&&){ throw std::runtime_error("oh no"); })
	.then([](Try<Unit>&& t) {
	auto caught = t.withException<std::runtime_error>(
	[](const std::runtime_error& e){
	// ...
	});
	CHECK(caught);
	});
	}

	// Better. Wrap an exception, and rethrow to access it upstream.
	void throwWrappedAndCatchImpl() {
	makeFuture()
	.then([](Try<Unit>&&){
	return makeFuture<Unit>(std::runtime_error("oh no"));
	})
	.then([](Try<Unit>&& t) {
	try {
	t.value();
	} catch(const std::runtime_error& e) {
	// ...
	return;
	}
	CHECK(false);
	});
	}

	// The new way. Wrap an exception, and access it via the wrapper upstream
	void throwWrappedAndCatchWrappedImpl() {
	makeFuture()
	.then([](Try<Unit>&&){
	return makeFuture<Unit>(std::runtime_error("oh no"));
	})
	.then([](Try<Unit>&& t){
	auto caught = t.withException<std::runtime_error>(
	[](const std::runtime_error& e){
	// ...
	});
	CHECK(caught);
	});
	}

	// Simulate heavy contention on func
	void contend(void(*func)()) {
	folly::BenchmarkSuspender s;
	const int N = 100;
	const int iters = 1000;
	pthread_barrier_t barrier;
	pthread_barrier_init(&barrier, nullptr, N+1);
	std::vector<std::thread> threads;
	for (int i = 0; i < N; i++) {
	threads.push_back(std::thread([&](){
	pthread_barrier_wait(&barrier);
	for (int j = 0; j < iters; j++) {
	func();
	}
	}));
	}
	pthread_barrier_wait(&barrier);
	s.dismiss();
	for (auto& t : threads) {
	t.join();
	}
	s.rehire();
	pthread_barrier_destroy(&barrier);
	}

	BENCHMARK(throwAndCatch) {
	throwAndCatchImpl();
	}

	BENCHMARK_RELATIVE(throwAndCatchWrapped) {
	throwAndCatchWrappedImpl();
	}

	BENCHMARK_RELATIVE(throwWrappedAndCatch) {
	throwWrappedAndCatchImpl();
	}

	BENCHMARK_RELATIVE(throwWrappedAndCatchWrapped) {
	throwWrappedAndCatchWrappedImpl();
	}

	BENCHMARK_DRAW_LINE();

	BENCHMARK(throwAndCatchContended) {
	contend(throwAndCatchImpl);
	}

	BENCHMARK_RELATIVE(throwAndCatchWrappedContended) {
	contend(throwAndCatchWrappedImpl);
	}

	BENCHMARK_RELATIVE(throwWrappedAndCatchContended) {
	contend(throwWrappedAndCatchImpl);
	}

	BENCHMARK_RELATIVE(throwWrappedAndCatchWrappedContended) {
	contend(throwWrappedAndCatchWrappedImpl);
	}

	InlineExecutor exe;

	template <class T>
	Future<T> fGen() {
	Promise<T> p;
	auto f = p.getFuture()
	.then([] (T&& t) {
	return std::move(t);
	})
	.then([] (T&& t) {
	return makeFuture(std::move(t));
	})
	.via(&exe)
	.then([] (T&& t) {
	return std::move(t);
	})
	.then([] (T&& t) {
	return makeFuture(std::move(t));
	});
	p.setValue(T());
	return f;
	}

	template <class T>
	std::vector<Future<T>> fsGen() {
	std::vector<Future<T>> fs;
	for (auto i = 0; i < 10; i++) {
	fs.push_back(fGen<T>());
	}
	return fs;
	}

	template <class T>
	void complexBenchmark() {
	collect(fsGen<T>());
	collectAll(fsGen<T>());
	collectAny(fsGen<T>());
	futures::map(fsGen<T>(), [] (const T& t) {
	return t;
	});
	futures::map(fsGen<T>(), [] (const T& t) {
	return makeFuture(T(t));
	});
	}

	BENCHMARK_DRAW_LINE();

	template <size_t S>
	struct Blob {
	char buf[S];
	};

	BENCHMARK(complexUnit) {
	complexBenchmark<Unit>();
	}

	BENCHMARK_RELATIVE(complexBlob4) {
	complexBenchmark<Blob<4>>();
	}

	BENCHMARK_RELATIVE(complexBlob8) {
	complexBenchmark<Blob<8>>();
	}

	BENCHMARK_RELATIVE(complexBlob64) {
	complexBenchmark<Blob<64>>();
	}

	BENCHMARK_RELATIVE(complexBlob128) {
	complexBenchmark<Blob<128>>();
	}

	BENCHMARK_RELATIVE(complexBlob256) {
	complexBenchmark<Blob<256>>();
	}

	BENCHMARK_RELATIVE(complexBlob512) {
	complexBenchmark<Blob<512>>();
	}

	BENCHMARK_RELATIVE(complexBlob1024) {
	complexBenchmark<Blob<1024>>();
	}

	BENCHMARK_RELATIVE(complexBlob2048) {
	complexBenchmark<Blob<2048>>();
	}

	BENCHMARK_RELATIVE(complexBlob4096) {
	complexBenchmark<Blob<4096>>();
	}

	int main(int argc, char** argv) {
	gflags::ParseCommandLineFlags(&argc, &argv, true);
	folly::runBenchmarks();
	return 0;
	}