git/bloom.h

#ifndef BLOOM_H
#define BLOOM_H

struct commit;
struct repository;
struct commit_graph;

struct bloom_filter_settings {};

#define DEFAULT_BLOOM_MAX_CHANGES
#define DEFAULT_BLOOM_FILTER_SETTINGS
#define BITS_PER_WORD
#define BLOOMDATA_CHUNK_HEADER_SIZE

/*
 * A bloom_filter struct represents a data segment to
 * use when testing hash values. The 'len' member
 * dictates how many entries are stored in
 * 'data'.
 */
struct bloom_filter {};

/*
 * A bloom_key represents the k hash values for a
 * given string. These can be precomputed and
 * stored in a bloom_key for re-use when testing
 * against a bloom_filter. The number of hashes is
 * given by the Bloom filter settings and is the same
 * for all Bloom filters and keys interacting with
 * the loaded version of the commit graph file and
 * the Bloom data chunks.
 */
struct bloom_key {};

int load_bloom_filter_from_graph(struct commit_graph *g,
				 struct bloom_filter *filter,
				 uint32_t graph_pos);

/*
 * Calculate the murmur3 32-bit hash value for the given data
 * using the given seed.
 * Produces a uniformly distributed hash value.
 * Not considered to be cryptographically secure.
 * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
 */
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len);

void fill_bloom_key(const char *data,
		    size_t len,
		    struct bloom_key *key,
		    const struct bloom_filter_settings *settings);
void clear_bloom_key(struct bloom_key *key);

void add_key_to_filter(const struct bloom_key *key,
		       struct bloom_filter *filter,
		       const struct bloom_filter_settings *settings);

void init_bloom_filters(void);
void deinit_bloom_filters(void);

enum bloom_filter_computed {};

struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
						 struct commit *c,
						 int compute_if_not_present,
						 const struct bloom_filter_settings *settings,
						 enum bloom_filter_computed *computed);

/*
 * Find the Bloom filter associated with the given commit "c".
 *
 * If any of the following are true
 *
 *   - the repository does not have a commit-graph, or
 *   - the repository disables reading from the commit-graph, or
 *   - the given commit does not have a Bloom filter computed, or
 *   - there is a Bloom filter for commit "c", but it cannot be read
 *     because the filter uses an incompatible version of murmur3
 *
 * , then `get_bloom_filter()` will return NULL. Otherwise, the corresponding
 * Bloom filter will be returned.
 *
 * For callers who wish to inspect Bloom filters with incompatible hash
 * versions, use get_or_compute_bloom_filter().
 */
struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c);

int bloom_filter_contains(const struct bloom_filter *filter,
			  const struct bloom_key *key,
			  const struct bloom_filter_settings *settings);

#endif