blob: 8522a5a384701675a63c26a0ffc59cbcd8269540 [file] [log] [blame]
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Higher performance (up to 10x) atomic increment using thread caching.
*
* @author Spencer Ahrens (sahrens)
*/
#ifndef FOLLY_THREADCACHEDINT_H
#define FOLLY_THREADCACHEDINT_H
#include <atomic>
#include <boost/noncopyable.hpp>
#include <folly/Likely.h>
#include <folly/ThreadLocal.h>
namespace folly {
// Note that readFull requires holding a lock and iterating through all of the
// thread local objects with the same Tag, so if you have a lot of
// ThreadCachedInt's you should considering breaking up the Tag space even
// further.
template <class IntT, class Tag=IntT>
class ThreadCachedInt : boost::noncopyable {
struct IntCache;
public:
explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
: target_(initialVal), cacheSize_(cacheSize) {
}
void increment(IntT inc) {
auto cache = cache_.get();
if (UNLIKELY(cache == nullptr || cache->parent_ == nullptr)) {
cache = new IntCache(*this);
cache_.reset(cache);
}
cache->increment(inc);
}
// Quickly grabs the current value which may not include some cached
// increments.
IntT readFast() const {
return target_.load(std::memory_order_relaxed);
}
// Reads the current value plus all the cached increments. Requires grabbing
// a lock, so this is significantly slower than readFast().
IntT readFull() const {
IntT ret = readFast();
for (const auto& cache : cache_.accessAllThreads()) {
if (!cache.reset_.load(std::memory_order_acquire)) {
ret += cache.val_.load(std::memory_order_relaxed);
}
}
return ret;
}
// Quickly reads and resets current value (doesn't reset cached increments).
IntT readFastAndReset() {
return target_.exchange(0, std::memory_order_release);
}
// This function is designed for accumulating into another counter, where you
// only want to count each increment once. It can still get the count a
// little off, however, but it should be much better than calling readFull()
// and set(0) sequentially.
IntT readFullAndReset() {
IntT ret = readFastAndReset();
for (auto& cache : cache_.accessAllThreads()) {
if (!cache.reset_.load(std::memory_order_acquire)) {
ret += cache.val_.load(std::memory_order_relaxed);
cache.reset_.store(true, std::memory_order_release);
}
}
return ret;
}
void setCacheSize(uint32_t newSize) {
cacheSize_.store(newSize, std::memory_order_release);
}
uint32_t getCacheSize() const {
return cacheSize_.load();
}
ThreadCachedInt& operator+=(IntT inc) { increment(inc); return *this; }
ThreadCachedInt& operator-=(IntT inc) { increment(-inc); return *this; }
// pre-increment (we don't support post-increment)
ThreadCachedInt& operator++() { increment(1); return *this; }
ThreadCachedInt& operator--() { increment(-1); return *this; }
// Thread-safe set function.
// This is a best effort implementation. In some edge cases, there could be
// data loss (missing counts)
void set(IntT newVal) {
for (auto& cache : cache_.accessAllThreads()) {
cache.reset_.store(true, std::memory_order_release);
}
target_.store(newVal, std::memory_order_release);
}
// This is a little tricky - it's possible that our IntCaches are still alive
// in another thread and will get destroyed after this destructor runs, so we
// need to make sure we signal that this parent is dead.
~ThreadCachedInt() {
for (auto& cache : cache_.accessAllThreads()) {
cache.parent_ = nullptr;
}
}
private:
std::atomic<IntT> target_;
std::atomic<uint32_t> cacheSize_;
ThreadLocalPtr<IntCache,Tag> cache_; // Must be last for dtor ordering
// This should only ever be modified by one thread
struct IntCache {
ThreadCachedInt* parent_;
mutable std::atomic<IntT> val_;
mutable uint32_t numUpdates_;
std::atomic<bool> reset_;
explicit IntCache(ThreadCachedInt& parent)
: parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}
void increment(IntT inc) {
if (LIKELY(!reset_.load(std::memory_order_acquire))) {
// This thread is the only writer to val_, so it's fine do do
// a relaxed load and do the addition non-atomically.
val_.store(
val_.load(std::memory_order_relaxed) + inc,
std::memory_order_release
);
} else {
val_.store(inc, std::memory_order_relaxed);
reset_.store(false, std::memory_order_release);
}
++numUpdates_;
if (UNLIKELY(numUpdates_ >
parent_->cacheSize_.load(std::memory_order_acquire))) {
flush();
}
}
void flush() const {
parent_->target_.fetch_add(val_, std::memory_order_release);
val_.store(0, std::memory_order_release);
numUpdates_ = 0;
}
~IntCache() {
if (parent_) {
flush();
}
}
};
};
}
#endif