read-cache.c | Explore in Territory

/*
 * GIT - The information manager from hell
 *
 * Copyright (C) Linus Torvalds, 2005
 */

#define USE_THE_REPOSITORY_VARIABLE

#include "git-compat-util.h"
#include "bulk-checkin.h"
#include "config.h"
#include "date.h"
#include "diff.h"
#include "diffcore.h"
#include "hex.h"
#include "tempfile.h"
#include "lockfile.h"
#include "cache-tree.h"
#include "refs.h"
#include "dir.h"
#include "object-file.h"
#include "object-store-ll.h"
#include "oid-array.h"
#include "tree.h"
#include "commit.h"
#include "environment.h"
#include "gettext.h"
#include "mem-pool.h"
#include "name-hash.h"
#include "object-name.h"
#include "path.h"
#include "preload-index.h"
#include "read-cache.h"
#include "repository.h"
#include "resolve-undo.h"
#include "revision.h"
#include "strbuf.h"
#include "trace2.h"
#include "varint.h"
#include "split-index.h"
#include "symlinks.h"
#include "utf8.h"
#include "fsmonitor.h"
#include "thread-utils.h"
#include "progress.h"
#include "sparse-index.h"
#include "csum-file.h"
#include "promisor-remote.h"
#include "hook.h"

/* Mask for the name length in ce_flags in the on-disk index */

#define CE_NAMEMASK …

/* Index extensions.
 *
 * The first letter should be 'A'..'Z' for extensions that are not
 * necessary for a correct operation (i.e. optimization data).
 * When new extensions are added that _needs_ to be understood in
 * order to correctly interpret the index file, pick character that
 * is outside the range, to cause the reader to abort.
 */

#define CACHE_EXT(s) …
#define CACHE_EXT_TREE …
#define CACHE_EXT_RESOLVE_UNDO …
#define CACHE_EXT_LINK …
#define CACHE_EXT_UNTRACKED …
#define CACHE_EXT_FSMONITOR …
#define CACHE_EXT_ENDOFINDEXENTRIES …
#define CACHE_EXT_INDEXENTRYOFFSETTABLE …
#define CACHE_EXT_SPARSE_DIRECTORIES …

/* changes that can be kept in $GIT_DIR/index (basically all extensions) */
#define EXTMASK …


/*
 * This is an estimate of the pathname length in the index.  We use
 * this for V4 index files to guess the un-deltafied size of the index
 * in memory because of pathname deltafication.  This is not required
 * for V2/V3 index formats because their pathnames are not compressed.
 * If the initial amount of memory set aside is not sufficient, the
 * mem pool will allocate extra memory.
 */
#define CACHE_ENTRY_PATH_LENGTH …

enum index_search_mode { … };

static inline struct cache_entry *mem_pool__ce_alloc(struct mem_pool *mem_pool, size_t len)
{ … }

static inline struct cache_entry *mem_pool__ce_calloc(struct mem_pool *mem_pool, size_t len)
{ … }

static struct mem_pool *find_mem_pool(struct index_state *istate)
{ … }

static const char *alternate_index_output;

static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
{ … }

static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
{ … }

void rename_index_entry_at(struct index_state *istate, int nr, const char *new_name)
{ … }

/*
 * This only updates the "non-critical" parts of the directory
 * cache, ie the parts that aren't tracked by GIT, and only used
 * to validate the cache.
 */
void fill_stat_cache_info(struct index_state *istate, struct cache_entry *ce, struct stat *st)
{ … }

static unsigned int st_mode_from_ce(const struct cache_entry *ce)
{ … }

int fake_lstat(const struct cache_entry *ce, struct stat *st)
{ … }

static int ce_compare_data(struct index_state *istate,
			   const struct cache_entry *ce,
			   struct stat *st)
{ … }

static int ce_compare_link(const struct cache_entry *ce, size_t expected_size)
{ … }

static int ce_compare_gitlink(const struct cache_entry *ce)
{ … }

static int ce_modified_check_fs(struct index_state *istate,
				const struct cache_entry *ce,
				struct stat *st)
{ … }

static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st)
{ … }

static int is_racy_stat(const struct index_state *istate,
			const struct stat_data *sd)
{ … }

int is_racy_timestamp(const struct index_state *istate,
			     const struct cache_entry *ce)
{ … }

int match_stat_data_racy(const struct index_state *istate,
			 const struct stat_data *sd, struct stat *st)
{ … }

int ie_match_stat(struct index_state *istate,
		  const struct cache_entry *ce, struct stat *st,
		  unsigned int options)
{ … }

int ie_modified(struct index_state *istate,
		const struct cache_entry *ce,
		struct stat *st, unsigned int options)
{ … }

static int cache_name_stage_compare(const char *name1, int len1, int stage1,
				    const char *name2, int len2, int stage2)
{ … }

int cmp_cache_name_compare(const void *a_, const void *b_)
{ … }

static int index_name_stage_pos(struct index_state *istate,
				const char *name, int namelen,
				int stage,
				enum index_search_mode search_mode)
{ … }

int index_name_pos(struct index_state *istate, const char *name, int namelen)
{ … }

int index_name_pos_sparse(struct index_state *istate, const char *name, int namelen)
{ … }

int index_entry_exists(struct index_state *istate, const char *name, int namelen)
{ … }

int remove_index_entry_at(struct index_state *istate, int pos)
{ … }

/*
 * Remove all cache entries marked for removal, that is where
 * CE_REMOVE is set in ce_flags.  This is much more effective than
 * calling remove_index_entry_at() for each entry to be removed.
 */
void remove_marked_cache_entries(struct index_state *istate, int invalidate)
{ … }

int remove_file_from_index(struct index_state *istate, const char *path)
{ … }

static int compare_name(struct cache_entry *ce, const char *path, int namelen)
{ … }

static int index_name_pos_also_unmerged(struct index_state *istate,
	const char *path, int namelen)
{ … }

static int different_name(struct cache_entry *ce, struct cache_entry *alias)
{ … }

/*
 * If we add a filename that aliases in the cache, we will use the
 * name that we already have - but we don't want to update the same
 * alias twice, because that implies that there were actually two
 * different files with aliasing names!
 *
 * So we use the CE_ADDED flag to verify that the alias was an old
 * one before we accept it as
 */
static struct cache_entry *create_alias_ce(struct index_state *istate,
					   struct cache_entry *ce,
					   struct cache_entry *alias)
{ … }

void set_object_name_for_intent_to_add_entry(struct cache_entry *ce)
{ … }

int add_to_index(struct index_state *istate, const char *path, struct stat *st, int flags)
{ … }

int add_file_to_index(struct index_state *istate, const char *path, int flags)
{ … }

struct cache_entry *make_empty_cache_entry(struct index_state *istate, size_t len)
{ … }

struct cache_entry *make_empty_transient_cache_entry(size_t len,
						     struct mem_pool *ce_mem_pool)
{ … }

enum verify_path_result { … };

static enum verify_path_result verify_path_internal(const char *, unsigned);

int verify_path(const char *path, unsigned mode)
{ … }

struct cache_entry *make_cache_entry(struct index_state *istate,
				     unsigned int mode,
				     const struct object_id *oid,
				     const char *path,
				     int stage,
				     unsigned int refresh_options)
{ … }

struct cache_entry *make_transient_cache_entry(unsigned int mode,
					       const struct object_id *oid,
					       const char *path,
					       int stage,
					       struct mem_pool *ce_mem_pool)
{ … }

/*
 * Chmod an index entry with either +x or -x.
 *
 * Returns -1 if the chmod for the particular cache entry failed (if it's
 * not a regular file), -2 if an invalid flip argument is passed in, 0
 * otherwise.
 */
int chmod_index_entry(struct index_state *istate, struct cache_entry *ce,
		      char flip)
{ … }

int ce_same_name(const struct cache_entry *a, const struct cache_entry *b)
{ … }

/*
 * We fundamentally don't like some paths: we don't want
 * dot or dot-dot anywhere, and for obvious reasons don't
 * want to recurse into ".git" either.
 *
 * Also, we don't want double slashes or slashes at the
 * end that can make pathnames ambiguous.
 */
static int verify_dotfile(const char *rest, unsigned mode)
{ … }

static enum verify_path_result verify_path_internal(const char *path,
						    unsigned mode)
{ … }

/*
 * Do we have another file that has the beginning components being a
 * proper superset of the name we're trying to add?
 */
static int has_file_name(struct index_state *istate,
			 const struct cache_entry *ce, int pos, int ok_to_replace)
{ … }


/*
 * Like strcmp(), but also return the offset of the first change.
 * If strings are equal, return the length.
 */
int strcmp_offset(const char *s1, const char *s2, size_t *first_change)
{ … }

/*
 * Do we have another file with a pathname that is a proper
 * subset of the name we're trying to add?
 *
 * That is, is there another file in the index with a path
 * that matches a sub-directory in the given entry?
 */
static int has_dir_name(struct index_state *istate,
			const struct cache_entry *ce, int pos, int ok_to_replace)
{ … }

/* We may be in a situation where we already have path/file and path
 * is being added, or we already have path and path/file is being
 * added.  Either one would result in a nonsense tree that has path
 * twice when git-write-tree tries to write it out.  Prevent it.
 *
 * If ok-to-replace is specified, we remove the conflicting entries
 * from the cache so the caller should recompute the insert position.
 * When this happens, we return non-zero.
 */
static int check_file_directory_conflict(struct index_state *istate,
					 const struct cache_entry *ce,
					 int pos, int ok_to_replace)
{ … }

static int add_index_entry_with_check(struct index_state *istate, struct cache_entry *ce, int option)
{ … }

int add_index_entry(struct index_state *istate, struct cache_entry *ce, int option)
{ … }

/*
 * "refresh" does not calculate a new sha1 file or bring the
 * cache up-to-date for mode/content changes. But what it
 * _does_ do is to "re-match" the stat information of a file
 * with the cache, so that you can refresh the cache for a
 * file that hasn't been changed but where the stat entry is
 * out of date.
 *
 * For example, you'd want to do this after doing a "git-read-tree",
 * to link up the stat cache details with the proper files.
 */
static struct cache_entry *refresh_cache_ent(struct index_state *istate,
					     struct cache_entry *ce,
					     unsigned int options, int *err,
					     int *changed_ret,
					     int *t2_did_lstat,
					     int *t2_did_scan)
{ … }

static void show_file(const char * fmt, const char * name, int in_porcelain,
		      int * first, const char *header_msg)
{ … }

int repo_refresh_and_write_index(struct repository *repo,
				 unsigned int refresh_flags,
				 unsigned int write_flags,
				 int gentle,
				 const struct pathspec *pathspec,
				 char *seen, const char *header_msg)
{ … }


int refresh_index(struct index_state *istate, unsigned int flags,
		  const struct pathspec *pathspec,
		  char *seen, const char *header_msg)
{ … }

struct cache_entry *refresh_cache_entry(struct index_state *istate,
					struct cache_entry *ce,
					unsigned int options)
{ … }


/*****************************************************************
 * Index File I/O
 *****************************************************************/

#define INDEX_FORMAT_DEFAULT …

static unsigned int get_index_format_default(struct repository *r)
{ … }

/*
 * dev/ino/uid/gid/size are also just tracked to the low 32 bits
 * Again - this is just a (very strong in practice) heuristic that
 * the inode hasn't changed.
 *
 * We save the fields in big-endian order to allow using the
 * index file over NFS transparently.
 */
struct ondisk_cache_entry { … };

/* These are only used for v3 or lower */
#define align_padding_size(size, len) …
#define align_flex_name(STRUCT,len) …
#define ondisk_cache_entry_size(len) …
#define ondisk_data_size(flags, len) …
#define ondisk_data_size_max(len) …
#define ondisk_ce_size(ce) …

/* Allow fsck to force verification of the index checksum. */
int verify_index_checksum;

/* Allow fsck to force verification of the cache entry order. */
int verify_ce_order;

static int verify_hdr(const struct cache_header *hdr, unsigned long size)
{ … }

static int read_index_extension(struct index_state *istate,
				const char *ext, const char *data, unsigned long sz)
{ … }

/*
 * Parses the contents of the cache entry contained within the 'ondisk' buffer
 * into a new incore 'cache_entry'.
 *
 * Note that 'char *ondisk' may not be aligned to a 4-byte address interval in
 * index v4, so we cannot cast it to 'struct ondisk_cache_entry *' and access
 * its members. Instead, we use the byte offsets of members within the struct to
 * identify where 'get_be16()', 'get_be32()', and 'oidread()' (which can all
 * read from an unaligned memory buffer) should read from the 'ondisk' buffer
 * into the corresponding incore 'cache_entry' members.
 */
static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
					    unsigned int version,
					    const char *ondisk,
					    unsigned long *ent_size,
					    const struct cache_entry *previous_ce)
{ … }

static void check_ce_order(struct index_state *istate)
{ … }

static void tweak_untracked_cache(struct index_state *istate)
{ … }

static void tweak_split_index(struct index_state *istate)
{ … }

static void post_read_index_from(struct index_state *istate)
{ … }

static size_t estimate_cache_size_from_compressed(unsigned int entries)
{ … }

static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
{ … }

struct index_entry_offset
{ … };

struct index_entry_offset_table
{ … };

static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset);
static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot);

static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset);

struct load_index_extensions
{ … };

static void *load_index_extensions(void *_data)
{ … }

/*
 * A helper function that will load the specified range of cache entries
 * from the memory mapped file and add them to the given index.
 */
static unsigned long load_cache_entry_block(struct index_state *istate,
			struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap,
			unsigned long start_offset, const struct cache_entry *previous_ce)
{ … }

static unsigned long load_all_cache_entries(struct index_state *istate,
			const char *mmap, size_t mmap_size, unsigned long src_offset)
{ … }

/*
 * Mostly randomly chosen maximum thread counts: we
 * cap the parallelism to online_cpus() threads, and we want
 * to have at least 10000 cache entries per thread for it to
 * be worth starting a thread.
 */

#define THREAD_COST …

struct load_cache_entries_thread_data
{ … };

/*
 * A thread proc to run the load_cache_entries() computation
 * across multiple background threads.
 */
static void *load_cache_entries_thread(void *_data)
{ … }

static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size,
						 int nr_threads, struct index_entry_offset_table *ieot)
{ … }

static void set_new_index_sparsity(struct index_state *istate)
{ … }

/* remember to discard_cache() before reading a different cache! */
int do_read_index(struct index_state *istate, const char *path, int must_exist)
{ … }

/*
 * Signal that the shared index is used by updating its mtime.
 *
 * This way, shared index can be removed if they have not been used
 * for some time.
 */
static void freshen_shared_index(const char *shared_index, int warn)
{ … }

int read_index_from(struct index_state *istate, const char *path,
		    const char *gitdir)
{ … }

int is_index_unborn(struct index_state *istate)
{ … }

void index_state_init(struct index_state *istate, struct repository *r)
{ … }

void release_index(struct index_state *istate)
{ … }

void discard_index(struct index_state *istate)
{ … }

/*
 * Validate the cache entries of this index.
 * All cache entries associated with this index
 * should have been allocated by the memory pool
 * associated with this index, or by a referenced
 * split index.
 */
void validate_cache_entries(const struct index_state *istate)
{ … }

int unmerged_index(const struct index_state *istate)
{ … }

int repo_index_has_changes(struct repository *repo,
			   struct tree *tree,
			   struct strbuf *sb)
{ … }

static int write_index_ext_header(struct hashfile *f,
				  git_hash_ctx *eoie_f,
				  unsigned int ext,
				  unsigned int sz)
{ … }

static void ce_smudge_racily_clean_entry(struct index_state *istate,
					 struct cache_entry *ce)
{ … }

/* Copy miscellaneous fields but not the name */
static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
				       struct cache_entry *ce)
{ … }

static int ce_write_entry(struct hashfile *f, struct cache_entry *ce,
			  struct strbuf *previous_name, struct ondisk_cache_entry *ondisk)
{ … }

/*
 * This function verifies if index_state has the correct sha1 of the
 * index file.  Don't die if we have any other failure, just return 0.
 */
static int verify_index_from(const struct index_state *istate, const char *path)
{ … }

static int repo_verify_index(struct repository *repo)
{ … }

int has_racy_timestamp(struct index_state *istate)
{ … }

void repo_update_index_if_able(struct repository *repo,
			       struct lock_file *lockfile)
{ … }

static int record_eoie(void)
{ … }

static int record_ieot(void)
{ … }

enum write_extensions { … };
#define WRITE_ALL_EXTENSIONS …

/*
 * On success, `tempfile` is closed. If it is the temporary file
 * of a `struct lock_file`, we will therefore effectively perform
 * a 'close_lock_file_gently()`. Since that is an implementation
 * detail of lockfiles, callers of `do_write_index()` should not
 * rely on it.
 */
static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
			  enum write_extensions write_extensions, unsigned flags)
{ … }

void set_alternate_index_output(const char *name)
{ … }

static int commit_locked_index(struct lock_file *lk)
{ … }

static int do_write_locked_index(struct index_state *istate,
				 struct lock_file *lock,
				 unsigned flags,
				 enum write_extensions write_extensions)
{ … }

static int write_split_index(struct index_state *istate,
			     struct lock_file *lock,
			     unsigned flags)
{ … }

static unsigned long get_shared_index_expire_date(void)
{ … }

static int should_delete_shared_index(const char *shared_index_path)
{ … }

static int clean_shared_index_files(const char *current_hex)
{ … }

static int write_shared_index(struct index_state *istate,
			      struct tempfile **temp, unsigned flags)
{ … }

static const int default_max_percent_split_change = …;

static int too_many_not_shared_entries(struct index_state *istate)
{ … }

int write_locked_index(struct index_state *istate, struct lock_file *lock,
		       unsigned flags)
{ … }

/*
 * Read the index file that is potentially unmerged into given
 * index_state, dropping any unmerged entries to stage #0 (potentially
 * resulting in a path appearing as both a file and a directory in the
 * index; the caller is responsible to clear out the extra entries
 * before writing the index to a tree).  Returns true if the index is
 * unmerged.  Callers who want to refuse to work from an unmerged
 * state can call this and check its return value, instead of calling
 * read_cache().
 */
int repo_read_index_unmerged(struct repository *repo)
{ … }

/*
 * Returns 1 if the path is an "other" path with respect to
 * the index; that is, the path is not mentioned in the index at all,
 * either as a file, a directory with some files in the index,
 * or as an unmerged entry.
 *
 * We helpfully remove a trailing "/" from directories so that
 * the output of read_directory can be used as-is.
 */
int index_name_is_other(struct index_state *istate, const char *name,
			int namelen)
{ … }

void *read_blob_data_from_index(struct index_state *istate,
				const char *path, unsigned long *size)
{ … }

void move_index_extensions(struct index_state *dst, struct index_state *src)
{ … }

struct cache_entry *dup_cache_entry(const struct cache_entry *ce,
				    struct index_state *istate)
{ … }

void discard_cache_entry(struct cache_entry *ce)
{ … }

int should_validate_cache_entries(void)
{ … }

#define EOIE_SIZE …
#define EOIE_SIZE_WITH_HEADER …

static size_t read_eoie_extension(const char *mmap, size_t mmap_size)
{ … }

static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset)
{ … }

#define IEOT_VERSION …

static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset)
{ … }

static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot)
{ … }

void prefetch_cache_entries(const struct index_state *istate,
			    must_prefetch_predicate must_prefetch)
{ … }

static int read_one_entry_opt(struct index_state *istate,
			      const struct object_id *oid,
			      struct strbuf *base,
			      const char *pathname,
			      unsigned mode, int opt)
{ … }

static int read_one_entry(const struct object_id *oid, struct strbuf *base,
			  const char *pathname, unsigned mode,
			  void *context)
{ … }

/*
 * This is used when the caller knows there is no existing entries at
 * the stage that will conflict with the entry being added.
 */
static int read_one_entry_quick(const struct object_id *oid, struct strbuf *base,
				const char *pathname, unsigned mode,
				void *context)
{ … }

/*
 * Read the tree specified with --with-tree option
 * (typically, HEAD) into stage #1 and then
 * squash them down to stage #0.  This is used for
 * --error-unmatch to list and check the path patterns
 * that were given from the command line.  We are not
 * going to write this index out.
 */
void overlay_tree_on_index(struct index_state *istate,
			   const char *tree_name, const char *prefix)
{ … }

struct update_callback_data { … };

static int fix_unmerged_status(struct diff_filepair *p,
			       struct update_callback_data *data)
{ … }

static void update_callback(struct diff_queue_struct *q,
			    struct diff_options *opt UNUSED, void *cbdata)
{ … }

int add_files_to_cache(struct repository *repo, const char *prefix,
		       const struct pathspec *pathspec, char *ps_matched,
		       int include_sparse, int flags)
{ … }
git/read-cache.c