faux-folly/folly/ThreadCachedInt.h - nest-cam/4320010/faux-folly - Git at Google

 /*
  * Copyright 2015 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /**
  * Higher performance (up to 10x) atomic increment using thread caching.
  *
  * @author Spencer Ahrens (sahrens)
  */

 #ifndef FOLLY_THREADCACHEDINT_H
 #define FOLLY_THREADCACHEDINT_H

 #include <atomic>

 #include <boost/noncopyable.hpp>

 #include <folly/Likely.h>
 #include <folly/ThreadLocal.h>

 namespace folly {


 // Note that readFull requires holding a lock and iterating through all of the
 // thread local objects with the same Tag, so if you have a lot of
 // ThreadCachedInt's you should considering breaking up the Tag space even
 // further.
 template <class IntT, class Tag=IntT>
 class ThreadCachedInt : boost::noncopyable {
   struct IntCache;

  public:
   explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
     : target_(initialVal), cacheSize_(cacheSize) {
   }

   void increment(IntT inc) {
     auto cache = cache_.get();
     if (UNLIKELY(cache == nullptr || cache->parent_ == nullptr)) {
       cache = new IntCache(*this);
       cache_.reset(cache);
     }
     cache->increment(inc);
   }

   // Quickly grabs the current value which may not include some cached
   // increments.
   IntT readFast() const {
     return target_.load(std::memory_order_relaxed);
   }

   // Reads the current value plus all the cached increments.  Requires grabbing
   // a lock, so this is significantly slower than readFast().
   IntT readFull() const {
     IntT ret = readFast();
     for (const auto& cache : cache_.accessAllThreads()) {
       if (!cache.reset_.load(std::memory_order_acquire)) {
         ret += cache.val_.load(std::memory_order_relaxed);
       }
     }
     return ret;
   }

   // Quickly reads and resets current value (doesn't reset cached increments).
   IntT readFastAndReset() {
     return target_.exchange(0, std::memory_order_release);
   }

   // This function is designed for accumulating into another counter, where you
   // only want to count each increment once.  It can still get the count a
   // little off, however, but it should be much better than calling readFull()
   // and set(0) sequentially.
   IntT readFullAndReset() {
     IntT ret = readFastAndReset();
     for (auto& cache : cache_.accessAllThreads()) {
       if (!cache.reset_.load(std::memory_order_acquire)) {
         ret += cache.val_.load(std::memory_order_relaxed);
         cache.reset_.store(true, std::memory_order_release);
       }
     }
     return ret;
   }

   void setCacheSize(uint32_t newSize) {
     cacheSize_.store(newSize, std::memory_order_release);
   }

   uint32_t getCacheSize() const {
     return cacheSize_.load();
   }

   ThreadCachedInt& operator+=(IntT inc) { increment(inc); return *this; }
   ThreadCachedInt& operator-=(IntT inc) { increment(-inc); return *this; }
   // pre-increment (we don't support post-increment)
   ThreadCachedInt& operator++() { increment(1); return *this; }
   ThreadCachedInt& operator--() { increment(-1); return *this; }

   // Thread-safe set function.
   // This is a best effort implementation. In some edge cases, there could be
   // data loss (missing counts)
   void set(IntT newVal) {
     for (auto& cache : cache_.accessAllThreads()) {
       cache.reset_.store(true, std::memory_order_release);
     }
     target_.store(newVal, std::memory_order_release);
   }

   // This is a little tricky - it's possible that our IntCaches are still alive
   // in another thread and will get destroyed after this destructor runs, so we
   // need to make sure we signal that this parent is dead.
   ~ThreadCachedInt() {
     for (auto& cache : cache_.accessAllThreads()) {
       cache.parent_ = nullptr;
     }
   }

  private:
   std::atomic<IntT> target_;
   std::atomic<uint32_t> cacheSize_;
   ThreadLocalPtr<IntCache,Tag> cache_; // Must be last for dtor ordering

   // This should only ever be modified by one thread
   struct IntCache {
     ThreadCachedInt* parent_;
     mutable std::atomic<IntT> val_;
     mutable uint32_t numUpdates_;
     std::atomic<bool> reset_;

     explicit IntCache(ThreadCachedInt& parent)
         : parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}

     void increment(IntT inc) {
       if (LIKELY(!reset_.load(std::memory_order_acquire))) {
         // This thread is the only writer to val_, so it's fine do do
         // a relaxed load and do the addition non-atomically.
         val_.store(
           val_.load(std::memory_order_relaxed) + inc,
           std::memory_order_release
         );
       } else {
         val_.store(inc, std::memory_order_relaxed);
         reset_.store(false, std::memory_order_release);
       }
       ++numUpdates_;
       if (UNLIKELY(numUpdates_ >
                    parent_->cacheSize_.load(std::memory_order_acquire))) {
         flush();
       }
     }

     void flush() const {
       parent_->target_.fetch_add(val_, std::memory_order_release);
       val_.store(0, std::memory_order_release);
       numUpdates_ = 0;
     }

     ~IntCache() {
       if (parent_) {
         flush();
       }
     }
   };
 };

 }

 #endif
	/*
	* Copyright 2015 Facebook, Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/**
	* Higher performance (up to 10x) atomic increment using thread caching.
	*
	* @author Spencer Ahrens (sahrens)
	*/

	#ifndef FOLLY_THREADCACHEDINT_H
	#define FOLLY_THREADCACHEDINT_H

	#include <atomic>

	#include <boost/noncopyable.hpp>

	#include <folly/Likely.h>
	#include <folly/ThreadLocal.h>

	namespace folly {


	// Note that readFull requires holding a lock and iterating through all of the
	// thread local objects with the same Tag, so if you have a lot of
	// ThreadCachedInt's you should considering breaking up the Tag space even
	// further.
	template <class IntT, class Tag=IntT>
	class ThreadCachedInt : boost::noncopyable {
	struct IntCache;

	public:
	explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
	: target_(initialVal), cacheSize_(cacheSize) {
	}

	void increment(IntT inc) {
	auto cache = cache_.get();
	if (UNLIKELY(cache == nullptr \|\| cache->parent_ == nullptr)) {
	cache = new IntCache(*this);
	cache_.reset(cache);
	}
	cache->increment(inc);
	}

	// Quickly grabs the current value which may not include some cached
	// increments.
	IntT readFast() const {
	return target_.load(std::memory_order_relaxed);
	}

	// Reads the current value plus all the cached increments. Requires grabbing
	// a lock, so this is significantly slower than readFast().
	IntT readFull() const {
	IntT ret = readFast();
	for (const auto& cache : cache_.accessAllThreads()) {
	if (!cache.reset_.load(std::memory_order_acquire)) {
	ret += cache.val_.load(std::memory_order_relaxed);
	}
	}
	return ret;
	}

	// Quickly reads and resets current value (doesn't reset cached increments).
	IntT readFastAndReset() {
	return target_.exchange(0, std::memory_order_release);
	}

	// This function is designed for accumulating into another counter, where you
	// only want to count each increment once. It can still get the count a
	// little off, however, but it should be much better than calling readFull()
	// and set(0) sequentially.
	IntT readFullAndReset() {
	IntT ret = readFastAndReset();
	for (auto& cache : cache_.accessAllThreads()) {
	if (!cache.reset_.load(std::memory_order_acquire)) {
	ret += cache.val_.load(std::memory_order_relaxed);
	cache.reset_.store(true, std::memory_order_release);
	}
	}
	return ret;
	}

	void setCacheSize(uint32_t newSize) {
	cacheSize_.store(newSize, std::memory_order_release);
	}

	uint32_t getCacheSize() const {
	return cacheSize_.load();
	}

	ThreadCachedInt& operator+=(IntT inc) { increment(inc); return *this; }
	ThreadCachedInt& operator-=(IntT inc) { increment(-inc); return *this; }
	// pre-increment (we don't support post-increment)
	ThreadCachedInt& operator++() { increment(1); return *this; }
	ThreadCachedInt& operator--() { increment(-1); return *this; }

	// Thread-safe set function.
	// This is a best effort implementation. In some edge cases, there could be
	// data loss (missing counts)
	void set(IntT newVal) {
	for (auto& cache : cache_.accessAllThreads()) {
	cache.reset_.store(true, std::memory_order_release);
	}
	target_.store(newVal, std::memory_order_release);
	}

	// This is a little tricky - it's possible that our IntCaches are still alive
	// in another thread and will get destroyed after this destructor runs, so we
	// need to make sure we signal that this parent is dead.
	~ThreadCachedInt() {
	for (auto& cache : cache_.accessAllThreads()) {
	cache.parent_ = nullptr;
	}
	}

	private:
	std::atomic<IntT> target_;
	std::atomic<uint32_t> cacheSize_;
	ThreadLocalPtr<IntCache,Tag> cache_; // Must be last for dtor ordering

	// This should only ever be modified by one thread
	struct IntCache {
	ThreadCachedInt* parent_;
	mutable std::atomic<IntT> val_;
	mutable uint32_t numUpdates_;
	std::atomic<bool> reset_;

	explicit IntCache(ThreadCachedInt& parent)
	: parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}

	void increment(IntT inc) {
	if (LIKELY(!reset_.load(std::memory_order_acquire))) {
	// This thread is the only writer to val_, so it's fine do do
	// a relaxed load and do the addition non-atomically.
	val_.store(
	val_.load(std::memory_order_relaxed) + inc,
	std::memory_order_release
	);
	} else {
	val_.store(inc, std::memory_order_relaxed);
	reset_.store(false, std::memory_order_release);
	}
	++numUpdates_;
	if (UNLIKELY(numUpdates_ >
	parent_->cacheSize_.load(std::memory_order_acquire))) {
	flush();
	}
	}

	void flush() const {
	parent_->target_.fetch_add(val_, std::memory_order_release);
	val_.store(0, std::memory_order_release);
	numUpdates_ = 0;
	}

	~IntCache() {
	if (parent_) {
	flush();
	}
	}
	};
	};

	}

	#endif