chromium/components/zucchini/binary_data_histogram.h

// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_
#define COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <string>

#include "components/zucchini/buffer_view.h"

namespace zucchini {

// A class to detect outliers in a list of doubles using Chauvenet's criterion:
// Compute mean and standard deviation of observations, then determine whether
// a query value lies beyond a fixed number of standard deviations (sigmas) from
// the mean. The purpose of this test is to reduce the chance of false-positive
// ensemble matches.
class OutlierDetector {};

// A class to compute similarity score between binary data. The heuristic here
// preprocesses input data to a size-65536 histogram, counting the frequency of
// consecutive 2-byte sequences. Therefore data with lengths < 2 are considered
// invalid -- but this is okay for Zucchini's use case.
class BinaryDataHistogram {};

}  // namespace zucchini

#endif  // COMPONENTS_ZUCCHINI_BINARY_DATA_HISTOGRAM_H_