// Copyright 2023 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef HIGHWAY_HWY_ROBUST_STATISTICS_H_ #define HIGHWAY_HWY_ROBUST_STATISTICS_H_ #include <algorithm> // std::sort, std::find_if #include <limits> #include <utility> // std::pair #include <vector> #include "hwy/base.h" namespace hwy { namespace robust_statistics { // Sorts integral values in ascending order (e.g. for Mode). About 3x faster // than std::sort for input distributions with very few unique values. template <class T> void CountingSort(T* values, size_t num_values) { … } // @return i in [idx_begin, idx_begin + half_count) that minimizes // sorted[i + half_count] - sorted[i]. template <typename T> size_t MinRange(const T* const HWY_RESTRICT sorted, const size_t idx_begin, const size_t half_count) { … } // Returns an estimate of the mode by calling MinRange on successively // halved intervals. "sorted" must be in ascending order. This is the // Half Sample Mode estimator proposed by Bickel in "On a fast, robust // estimator of the mode", with complexity O(N log N). The mode is less // affected by outliers in highly-skewed distributions than the median. // The averaging operation below assumes "T" is an unsigned integer type. template <typename T> T ModeOfSorted(const T* const HWY_RESTRICT sorted, const size_t num_values) { … } // Returns the mode. Side effect: sorts "values". template <typename T> T Mode(T* values, const size_t num_values) { … } template <typename T, size_t N> T Mode(T (&values)[N]) { … } // Returns the median value. Side effect: sorts "values". template <typename T> T Median(T* values, const size_t num_values) { … } // Returns a robust measure of variability. template <typename T> T MedianAbsoluteDeviation(const T* values, const size_t num_values, const T median) { … } } // namespace robust_statistics } // namespace hwy #endif // HIGHWAY_HWY_ROBUST_STATISTICS_H_