blob: 925ab28e4ad0142199edb80d728635f198e4717e [file] [log] [blame]
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_HISTOGRAM_DEFS_H_
#define FOLLY_HISTOGRAM_DEFS_H_
#include <folly/Conv.h>
#include <glog/logging.h>
namespace folly {
namespace detail {
template <typename T, typename BucketT>
HistogramBuckets<T, BucketT>::HistogramBuckets(ValueType bucketSize,
ValueType min,
ValueType max,
const BucketType& defaultBucket)
: bucketSize_(bucketSize),
min_(min),
max_(max) {
CHECK_GT(bucketSize_, ValueType(0));
CHECK_LT(min_, max_);
// Deliberately make this a signed type, because we're about
// to compare it against max-min, which is nominally signed, too.
int numBuckets = (max - min) / bucketSize;
// Round up if the bucket size does not fit evenly
if (numBuckets * bucketSize < max - min) {
++numBuckets;
}
// Add 2 for the extra 'below min' and 'above max' buckets
numBuckets += 2;
buckets_.assign(numBuckets, defaultBucket);
}
template <typename T, typename BucketType>
unsigned int HistogramBuckets<T, BucketType>::getBucketIdx(
ValueType value) const {
if (value < min_) {
return 0;
} else if (value >= max_) {
return buckets_.size() - 1;
} else {
// the 1 is the below_min bucket
return ((value - min_) / bucketSize_) + 1;
}
}
template <typename T, typename BucketType>
template <typename CountFn>
const uint64_t HistogramBuckets<T, BucketType>::computeTotalCount(
CountFn countFromBucket) const {
uint64_t count = 0;
for (unsigned int n = 0; n < buckets_.size(); ++n) {
count += countFromBucket(const_cast<const BucketType&>(buckets_[n]));
}
return count;
}
template <typename T, typename BucketType>
template <typename CountFn>
unsigned int HistogramBuckets<T, BucketType>::getPercentileBucketIdx(
double pct,
CountFn countFromBucket,
double* lowPct, double* highPct) const {
CHECK_GE(pct, 0.0);
CHECK_LE(pct, 1.0);
unsigned int numBuckets = buckets_.size();
// Compute the counts in each bucket
std::vector<uint64_t> counts(numBuckets);
uint64_t totalCount = 0;
for (unsigned int n = 0; n < numBuckets; ++n) {
uint64_t bucketCount =
countFromBucket(const_cast<const BucketType&>(buckets_[n]));
counts[n] = bucketCount;
totalCount += bucketCount;
}
// If there are no elements, just return the lowest bucket.
// Note that we return bucket 1, which is the first bucket in the
// histogram range; bucket 0 is for all values below min_.
if (totalCount == 0) {
// Set lowPct and highPct both to 0.
// getPercentileEstimate() will recognize this to mean that the histogram
// is empty.
if (lowPct) {
*lowPct = 0.0;
}
if (highPct) {
*highPct = 0.0;
}
return 1;
}
// Loop through all the buckets, keeping track of each bucket's
// percentile range: [0,10], [10,17], [17,45], etc. When we find a range
// that includes our desired percentile, we return that bucket index.
double prevPct = 0.0;
double curPct = 0.0;
uint64_t curCount = 0;
unsigned int idx;
for (idx = 0; idx < numBuckets; ++idx) {
if (counts[idx] == 0) {
// skip empty buckets
continue;
}
prevPct = curPct;
curCount += counts[idx];
curPct = static_cast<double>(curCount) / totalCount;
if (pct <= curPct) {
// This is the desired bucket
break;
}
}
if (lowPct) {
*lowPct = prevPct;
}
if (highPct) {
*highPct = curPct;
}
return idx;
}
template <typename T, typename BucketType>
template <typename CountFn, typename AvgFn>
T HistogramBuckets<T, BucketType>::getPercentileEstimate(
double pct,
CountFn countFromBucket,
AvgFn avgFromBucket) const {
// Find the bucket where this percentile falls
double lowPct;
double highPct;
unsigned int bucketIdx = getPercentileBucketIdx(pct, countFromBucket,
&lowPct, &highPct);
if (lowPct == 0.0 && highPct == 0.0) {
// Invalid range -- the buckets must all be empty
// Return the default value for ValueType.
return ValueType();
}
if (lowPct == highPct) {
// Unlikely to have exact equality,
// but just return the bucket average in this case.
// We handle this here to avoid division by 0 below.
return avgFromBucket(buckets_[bucketIdx]);
}
CHECK_GE(pct, lowPct);
CHECK_LE(pct, highPct);
CHECK_LT(lowPct, highPct);
// Compute information about this bucket
ValueType avg = avgFromBucket(buckets_[bucketIdx]);
ValueType low;
ValueType high;
if (bucketIdx == 0) {
if (avg > min_) {
// This normally shouldn't happen. This bucket is only supposed to track
// values less than min_. Most likely this means that integer overflow
// occurred, and the code in avgFromBucket() returned a huge value
// instead of a small one. Just return the minimum possible value for
// now.
//
// (Note that if the counter keeps being decremented, eventually it will
// wrap and become small enough that we won't detect this any more, and
// we will return bogus information.)
LOG(ERROR) << "invalid average value in histogram minimum bucket: " <<
avg << " > " << min_ << ": possible integer overflow?";
return getBucketMin(bucketIdx);
}
// For the below-min bucket, just assume the lowest value ever seen is
// twice as far away from min_ as avg.
high = min_;
low = high - (2 * (high - avg));
// Adjust low in case it wrapped
if (low > avg) {
low = std::numeric_limits<ValueType>::min();
}
} else if (bucketIdx == buckets_.size() - 1) {
if (avg < max_) {
// Most likely this means integer overflow occurred. See the comments
// above in the minimum case.
LOG(ERROR) << "invalid average value in histogram maximum bucket: " <<
avg << " < " << max_ << ": possible integer overflow?";
return getBucketMax(bucketIdx);
}
// Similarly for the above-max bucket, assume the highest value ever seen
// is twice as far away from max_ as avg.
low = max_;
high = low + (2 * (avg - low));
// Adjust high in case it wrapped
if (high < avg) {
high = std::numeric_limits<ValueType>::max();
}
} else {
low = getBucketMin(bucketIdx);
high = getBucketMax(bucketIdx);
if (avg < low || avg > high) {
// Most likely this means an integer overflow occurred.
// See the comments above. Return the midpoint between low and high
// as a best guess, since avg is meaningless.
LOG(ERROR) << "invalid average value in histogram bucket: " <<
avg << " not in range [" << low << ", " << high <<
"]: possible integer overflow?";
return (low + high) / 2;
}
}
// Since we know the average value in this bucket, we can do slightly better
// than just assuming the data points in this bucket are uniformly
// distributed between low and high.
//
// Assume that the median value in this bucket is the same as the average
// value.
double medianPct = (lowPct + highPct) / 2.0;
if (pct < medianPct) {
// Assume that the data points lower than the median of this bucket
// are uniformly distributed between low and avg
double pctThroughSection = (pct - lowPct) / (medianPct - lowPct);
return low + ((avg - low) * pctThroughSection);
} else {
// Assume that the data points greater than the median of this bucket
// are uniformly distributed between avg and high
double pctThroughSection = (pct - medianPct) / (highPct - medianPct);
return avg + ((high - avg) * pctThroughSection);
}
}
} // detail
template <typename T>
std::string Histogram<T>::debugString() const {
std::string ret = folly::to<std::string>(
"num buckets: ", buckets_.getNumBuckets(),
", bucketSize: ", buckets_.getBucketSize(),
", min: ", buckets_.getMin(), ", max: ", buckets_.getMax(), "\n");
for (unsigned int i = 0; i < buckets_.getNumBuckets(); ++i) {
folly::toAppend(" ", buckets_.getBucketMin(i), ": ",
buckets_.getByIndex(i).count, "\n",
&ret);
}
return ret;
}
template <typename T>
void Histogram<T>::toTSV(std::ostream& out, bool skipEmptyBuckets) const {
for (unsigned int i = 0; i < buckets_.getNumBuckets(); ++i) {
// Do not output empty buckets in order to reduce data file size.
if (skipEmptyBuckets && getBucketByIndex(i).count == 0) {
continue;
}
const auto& bucket = getBucketByIndex(i);
out << getBucketMin(i) << '\t' << getBucketMax(i) << '\t'
<< bucket.count << '\t' << bucket.sum << '\n';
}
}
} // folly
#endif // FOLLY_HISTOGRAM_DEFS_H_