#include "git-compat-util.h" #include "diffcore.h" /* * Idea here is very simple. * * Almost all data we are interested in are text, but sometimes we have * to deal with binary data. So we cut them into chunks delimited by * LF byte, or 64-byte sequence, whichever comes first, and hash them. * * For those chunks, if the source buffer has more instances of it * than the destination buffer, that means the difference are the * number of bytes not copied from source to destination. If the * counts are the same, everything was copied from source to * destination. If the destination has more, everything was copied, * and destination added more. * * We are doing an approximation so we do not really have to waste * memory by actually storing the sequence. We just hash them into * somewhere around 2^16 hashbuckets and count the occurrences. */ /* Wild guess at the initial hash size */ #define INITIAL_HASH_SIZE … /* We leave more room in smaller hash but do not let it * grow to have unused hole too much. */ #define INITIAL_FREE(sz_log2) … /* A prime rather carefully chosen between 2^16..2^17, so that * HASHBASE < INITIAL_FREE(17). We want to keep the maximum hashtable * size under the current 2<<17 maximum, which can hold this many * different values before overflowing to hashtable of size 2<<18. */ #define HASHBASE … struct spanhash { … }; struct spanhash_top { … }; static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig) { … } static struct spanhash_top *add_spanhash(struct spanhash_top *top, unsigned int hashval, int cnt) { … } static int spanhash_cmp(const void *a_, const void *b_) { … } static struct spanhash_top *hash_chars(struct repository *r, struct diff_filespec *one) { … } int diffcore_count_changes(struct repository *r, struct diff_filespec *src, struct diff_filespec *dst, void **src_count_p, void **dst_count_p, unsigned long *src_copied, unsigned long *literal_added) { … }