/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <folly/stats/TDigest.h> #include <algorithm> #include <limits> #include <glog/logging.h> #include <folly/stats/detail/DoubleRadixSort.h> namespace folly { /* * A good biased scaling function has the following properties: * - The value of the function k(0, delta) = 0, and k(1, delta) = delta. * This is a requirement for any t-digest function. * - The limit of the derivative of the function dk/dq at 0 is inf, and at * 1 is inf. This provides bias to improve accuracy at the tails. * - For any q <= 0.5, dk/dq(q) = dk/dq(1-q). This ensures that the accuracy * of upper and lower quantiles are equivalent. * * The scaling function used here is... * k(q, d) = (IF q >= 0.5, d - d * sqrt(2 - 2q) / 2, d * sqrt(2q) / 2) * * k(0, d) = 0 * k(1, d) = d * * dk/dq = (IF q >= 0.5, d / sqrt(2-2q), d / sqrt(2q)) * limit q->1 dk/dq = inf * limit q->0 dk/dq = inf * * When plotted, the derivative function is symmetric, centered at q=0.5. * * Note that FMA has been tested here, but benchmarks have not shown it to be a * performance improvement. */ /* * q_to_k is unused but left here as a comment for completeness. * double q_to_k(double q, double d) { * if (q >= 0.5) { * return d - d * std::sqrt(0.5 - 0.5 * q); * } * return d * std::sqrt(0.5 * q); * } */ static double k_to_q(double k, double d) { … } static double clamp(double v, double lo, double hi) { … } TDigest::TDigest( std::vector<Centroid> centroids, double sum, double count, double max_val, double min_val, size_t maxSize) : … { … } // Merge unsorted values by first sorting them. Use radix sort if // possible. This implementation puts all additional memory in the // heap, so that if called from fiber context we do not smash the // stack. Otherwise it is very similar to boost::spreadsort. TDigest TDigest::merge(Range<const double*> unsortedValues) const { … } void TDigest::internalMerge( TDigest& dst, Range<const double*> sortedValues, std::vector<Centroid>& workingBuffer) const { … } TDigest TDigest::merge( sorted_equivalent_t, Range<const double*> sortedValues) const { … } void TDigest::merge( sorted_equivalent_t, Range<const double*> sortedValues, MergeWorkingBuffer& workingBuffer) { … } TDigest TDigest::merge(Range<const TDigest*> digests) { … } double TDigest::estimateQuantile(double q) const { … } double TDigest::Centroid::add(double sum, double weight) { … } } // namespace folly