blob: 81990d0bfd68ff5ddefd80053d45e48d85393559 [file] [log] [blame]
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_STATS_MULTILEVELTIMESERIES_H_
#define FOLLY_STATS_MULTILEVELTIMESERIES_H_
#include <chrono>
#include <string>
#include <vector>
#include <glog/logging.h>
#include <folly/stats/BucketedTimeSeries.h>
namespace folly {
/*
* This class represents a timeseries which keeps several levels of data
* granularity (similar in principle to the loads reported by the UNIX
* 'uptime' command). It uses several instances (one per level) of
* BucketedTimeSeries as the underlying storage.
*
* This can easily be used to track sums (and thus rates or averages) over
* several predetermined time periods, as well as all-time sums. For example,
* you would use to it to track query rate or response speed over the last
* 5, 15, 30, and 60 minutes.
*
* The MultiLevelTimeSeries takes a list of level durations as an input; the
* durations must be strictly increasing. Furthermore a special level can be
* provided with a duration of '0' -- this will be an "all-time" level. If
* an all-time level is provided, it MUST be the last level present.
*
* The class assumes that time advances forward -- you can't retroactively add
* values for events in the past -- the 'now' argument is provided for better
* efficiency and ease of unittesting.
*
* The class is not thread-safe -- use your own synchronization!
*/
template <typename VT, typename TT=std::chrono::seconds>
class MultiLevelTimeSeries {
public:
typedef VT ValueType;
typedef TT TimeType;
typedef folly::BucketedTimeSeries<ValueType, TimeType> Level;
/*
* Create a new MultiLevelTimeSeries.
*
* This creates a new MultiLevelTimeSeries that tracks time series data at the
* specified time durations (level). The time series data tracked at each
* level is then further divided by numBuckets for memory efficiency.
*
* The durations must be strictly increasing. Furthermore a special level can
* be provided with a duration of '0' -- this will be an "all-time" level. If
* an all-time level is provided, it MUST be the last level present.
*/
MultiLevelTimeSeries(size_t numBuckets,
size_t numLevels,
const TimeType levelDurations[]);
/*
* Return the number of buckets used to track time series at each level.
*/
size_t numBuckets() const {
// The constructor ensures that levels_ has at least one item
return levels_[0].numBuckets();
}
/*
* Return the number of levels tracked by MultiLevelTimeSeries.
*/
size_t numLevels() const { return levels_.size(); }
/*
* Get the BucketedTimeSeries backing the specified level.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
const Level& getLevel(int level) const {
CHECK(level >= 0);
CHECK_LT(level, levels_.size());
return levels_[level];
}
/*
* Get the highest granularity level that is still large enough to contain
* data going back to the specified start time.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
const Level& getLevel(TimeType start) const {
for (const auto& level : levels_) {
if (level.isAllTime()) {
return level;
}
// Note that we use duration() here rather than elapsed().
// If duration is large enough to contain the start time then this level
// is good enough, even if elapsed() indicates that no data was recorded
// before the specified start time.
if (level.getLatestTime() - level.duration() <= start) {
return level;
}
}
// We should always have an all-time level, so this is never reached.
LOG(FATAL) << "No level of timeseries covers internval"
<< " from " << start.count() << " to now";
return levels_.back();
}
/*
* Return the sum of all the data points currently tracked at this level.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
ValueType sum(int level) const {
return getLevel(level).sum();
}
/*
* Return the average (sum / count) of all the data points currently tracked
* at this level.
*
* The return type may be specified to control whether floating-point or
* integer division should be performed.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
template <typename ReturnType=double>
ReturnType avg(int level) const {
return getLevel(level).template avg<ReturnType>();
}
/*
* Return the rate (sum divided by elaspsed time) of the all data points
* currently tracked at this level.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
template <typename ReturnType=double, typename Interval=TimeType>
ValueType rate(int level) const {
return getLevel(level).template rate<ReturnType, Interval>();
}
/*
* Return the number of data points currently tracked at this level.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
int64_t count(int level) const {
return getLevel(level).count();
}
/*
* Return the count divided by the elapsed time tracked at this level.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
template <typename ReturnType=double, typename Interval=TimeType>
ReturnType countRate(int level) const {
return getLevel(level).template countRate<ReturnType, Interval>();
}
/*
* Estimate the sum of the data points that occurred in the specified time
* period at this level.
*
* The range queried is [start, end).
* That is, start is inclusive, and end is exclusive.
*
* Note that data outside of the timeseries duration will no longer be
* available for use in the estimation. Specifying a start time earlier than
* getEarliestTime() will not have much effect, since only data points after
* that point in time will be counted.
*
* Note that the value returned is an estimate, and may not be precise.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
ValueType sum(TimeType start, TimeType end) const {
return getLevel(start).sum(start, end);
}
/*
* Estimate the average value during the specified time period.
*
* The same caveats documented in the sum(TimeType start, TimeType end)
* comments apply here as well.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
template <typename ReturnType=double>
ReturnType avg(TimeType start, TimeType end) const {
return getLevel(start).template avg<ReturnType>(start, end);
}
/*
* Estimate the rate during the specified time period.
*
* The same caveats documented in the sum(TimeType start, TimeType end)
* comments apply here as well.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
template <typename ReturnType=double>
ReturnType rate(TimeType start, TimeType end) const {
return getLevel(start).template rate<ReturnType>(start, end);
}
/*
* Estimate the count during the specified time period.
*
* The same caveats documented in the sum(TimeType start, TimeType end)
* comments apply here as well.
*
* Note: you should generally call update() or flush() before accessing the
* data. Otherwise you may be reading stale data if update() or flush() has
* not been called recently.
*/
int64_t count(TimeType start, TimeType end) const {
return getLevel(start).count(start, end);
}
/*
* Adds the value 'val' at time 'now' to all levels.
*
* Data points added at the same time point is cached internally here and not
* propagated to the underlying levels until either flush() is called or when
* update from a different time comes.
*
* This function expects time to always move forwards: it cannot be used to
* add historical data points that have occurred in the past. If now is
* older than the another timestamp that has already been passed to
* addValue() or update(), now will be ignored and the latest timestamp will
* be used.
*/
void addValue(TimeType now, const ValueType& val);
/*
* Adds the value 'val' at time 'now' to all levels.
*/
void addValue(TimeType now, const ValueType& val, int64_t times);
/*
* Adds the value 'val' at time 'now' to all levels as the sum of 'nsamples'
* samples.
*/
void addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
/*
* Update all the levels to the specified time, doing all the necessary
* work to rotate the buckets and remove any stale data points.
*
* When reading data from the timeseries, you should make sure to manually
* call update() before accessing the data. Otherwise you may be reading
* stale data if update() has not been called recently.
*/
void update(TimeType now);
/*
* Reset all the timeseries to an empty state as if no data points have ever
* been added to it.
*/
void clear();
/*
* Flush all cached updates.
*/
void flush();
private:
std::vector<Level> levels_;
// Updates within the same time interval are cached
// They are flushed out when updates from a different time comes,
// or flush() is called.
TimeType cachedTime_;
ValueType cachedSum_;
int cachedCount_;
};
} // folly
#endif // FOLLY_STATS_MULTILEVELTIMESERIES_H_