git/dir.c

/*
 * This handles recursive filename detection with exclude
 * files, index knowledge etc..
 *
 * Copyright (C) Linus Torvalds, 2005-2006
 *		 Junio Hamano, 2005-2006
 */

#define USE_THE_REPOSITORY_VARIABLE

#include "git-compat-util.h"
#include "abspath.h"
#include "config.h"
#include "convert.h"
#include "dir.h"
#include "environment.h"
#include "gettext.h"
#include "name-hash.h"
#include "object-file.h"
#include "object-store-ll.h"
#include "path.h"
#include "refs.h"
#include "wildmatch.h"
#include "pathspec.h"
#include "utf8.h"
#include "varint.h"
#include "ewah/ewok.h"
#include "fsmonitor-ll.h"
#include "read-cache-ll.h"
#include "setup.h"
#include "sparse-index.h"
#include "submodule-config.h"
#include "symlinks.h"
#include "trace2.h"
#include "tree.h"
#include "hex.h"

 /*
  * The maximum size of a pattern/exclude file. If the file exceeds this size
  * we will ignore it.
  */
#define PATTERN_MAX_FILE_SIZE

/*
 * Tells read_directory_recursive how a file or directory should be treated.
 * Values are ordered by significance, e.g. if a directory contains both
 * excluded and untracked files, it is listed as untracked because
 * path_untracked > path_excluded.
 */
enum path_treatment {};

/*
 * Support data structure for our opendir/readdir/closedir wrappers
 */
struct cached_dir {};

static enum path_treatment read_directory_recursive(struct dir_struct *dir,
	struct index_state *istate, const char *path, int len,
	struct untracked_cache_dir *untracked,
	int check_only, int stop_at_first_file, const struct pathspec *pathspec);
static int resolve_dtype(int dtype, struct index_state *istate,
			 const char *path, int len);
struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp)
{}

int count_slashes(const char *s)
{}

int git_fspathcmp(const char *a, const char *b)
{}

int fspatheq(const char *a, const char *b)
{}

int git_fspathncmp(const char *a, const char *b, size_t count)
{}

int paths_collide(const char *a, const char *b)
{}

unsigned int fspathhash(const char *str)
{}

int git_fnmatch(const struct pathspec_item *item,
		const char *pattern, const char *string,
		int prefix)
{}

static int fnmatch_icase_mem(const char *pattern, int patternlen,
			     const char *string, int stringlen,
			     int flags)
{}

static size_t common_prefix_len(const struct pathspec *pathspec)
{}

/*
 * Returns a copy of the longest leading path common among all
 * pathspecs.
 */
char *common_prefix(const struct pathspec *pathspec)
{}

int fill_directory(struct dir_struct *dir,
		   struct index_state *istate,
		   const struct pathspec *pathspec)
{}

int within_depth(const char *name, int namelen,
			int depth, int max_depth)
{}

/*
 * Read the contents of the blob with the given OID into a buffer.
 * Append a trailing LF to the end if the last line doesn't have one.
 *
 * Returns:
 *    -1 when the OID is invalid or unknown or does not refer to a blob.
 *     0 when the blob is empty.
 *     1 along with { data, size } of the (possibly augmented) buffer
 *       when successful.
 *
 * Optionally updates the given oid_stat with the given OID (when valid).
 */
static int do_read_blob(const struct object_id *oid, struct oid_stat *oid_stat,
			size_t *size_out, char **data_out)
{}

#define DO_MATCH_EXCLUDE
#define DO_MATCH_DIRECTORY
#define DO_MATCH_LEADING_PATHSPEC

/*
 * Does the given pathspec match the given name?  A match is found if
 *
 * (1) the pathspec string is leading directory of 'name' ("RECURSIVELY"), or
 * (2) the pathspec string has a leading part matching 'name' ("LEADING"), or
 * (3) the pathspec string is a wildcard and matches 'name' ("WILDCARD"), or
 * (4) the pathspec string is exactly the same as 'name' ("EXACT").
 *
 * Return value tells which case it was (1-4), or 0 when there is no match.
 *
 * It may be instructive to look at a small table of concrete examples
 * to understand the differences between 1, 2, and 4:
 *
 *                              Pathspecs
 *                |    a/b    |   a/b/    |   a/b/c
 *          ------+-----------+-----------+------------
 *          a/b   |  EXACT    |  EXACT[1] | LEADING[2]
 *  Names   a/b/  | RECURSIVE |   EXACT   | LEADING[2]
 *          a/b/c | RECURSIVE | RECURSIVE |   EXACT
 *
 * [1] Only if DO_MATCH_DIRECTORY is passed; otherwise, this is NOT a match.
 * [2] Only if DO_MATCH_LEADING_PATHSPEC is passed; otherwise, not a match.
 */
static int match_pathspec_item(struct index_state *istate,
			       const struct pathspec_item *item, int prefix,
			       const char *name, int namelen, unsigned flags)
{}

/*
 * do_match_pathspec() is meant to ONLY be called by
 * match_pathspec_with_flags(); calling it directly risks pathspecs
 * like ':!unwanted_path' being ignored.
 *
 * Given a name and a list of pathspecs, returns the nature of the
 * closest (i.e. most specific) match of the name to any of the
 * pathspecs.
 *
 * The caller typically calls this multiple times with the same
 * pathspec and seen[] array but with different name/namelen
 * (e.g. entries from the index) and is interested in seeing if and
 * how each pathspec matches all the names it calls this function
 * with.  A mark is left in the seen[] array for each pathspec element
 * indicating the closest type of match that element achieved, so if
 * seen[n] remains zero after multiple invocations, that means the nth
 * pathspec did not match any names, which could indicate that the
 * user mistyped the nth pathspec.
 */
static int do_match_pathspec(struct index_state *istate,
			     const struct pathspec *ps,
			     const char *name, int namelen,
			     int prefix, char *seen,
			     unsigned flags)
{}

static int match_pathspec_with_flags(struct index_state *istate,
				     const struct pathspec *ps,
				     const char *name, int namelen,
				     int prefix, char *seen, unsigned flags)
{}

int match_pathspec(struct index_state *istate,
		   const struct pathspec *ps,
		   const char *name, int namelen,
		   int prefix, char *seen, int is_dir)
{}

/**
 * Check if a submodule is a superset of the pathspec
 */
int submodule_path_match(struct index_state *istate,
			 const struct pathspec *ps,
			 const char *submodule_name,
			 char *seen)
{}

int report_path_error(const char *ps_matched,
		      const struct pathspec *pathspec)
{}

/*
 * Return the length of the "simple" part of a path match limiter.
 */
int simple_length(const char *match)
{}

int no_wildcard(const char *string)
{}

void parse_path_pattern(const char **pattern,
			   int *patternlen,
			   unsigned *flags,
			   int *nowildcardlen)
{}

int pl_hashmap_cmp(const void *cmp_data UNUSED,
		   const struct hashmap_entry *a,
		   const struct hashmap_entry *b,
		   const void *key UNUSED)
{}

static char *dup_and_filter_pattern(const char *pattern)
{}

static void clear_pattern_entry_hashmap(struct hashmap *map)
{}

static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given)
{}

static int hashmap_contains_path(struct hashmap *map,
				 struct strbuf *pattern)
{}

int hashmap_contains_parent(struct hashmap *map,
			    const char *path,
			    struct strbuf *buffer)
{}

void add_pattern(const char *string, const char *base,
		 int baselen, struct pattern_list *pl, int srcpos)
{}

static int read_skip_worktree_file_from_index(struct index_state *istate,
					      const char *path,
					      size_t *size_out, char **data_out,
					      struct oid_stat *oid_stat)
{}

/*
 * Frees memory within pl which was allocated for exclude patterns and
 * the file buffer.  Does not free pl itself.
 */
void clear_pattern_list(struct pattern_list *pl)
{}

static void trim_trailing_spaces(char *buf)
{}

/*
 * Given a subdirectory name and "dir" of the current directory,
 * search the subdir in "dir" and return it, or create a new one if it
 * does not exist in "dir".
 *
 * If "name" has the trailing slash, it'll be excluded in the search.
 */
static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
						    struct untracked_cache_dir *dir,
						    const char *name, int len)
{}

static void do_invalidate_gitignore(struct untracked_cache_dir *dir)
{}

static void invalidate_gitignore(struct untracked_cache *uc,
				 struct untracked_cache_dir *dir)
{}

static void invalidate_directory(struct untracked_cache *uc,
				 struct untracked_cache_dir *dir)
{}

static int add_patterns_from_buffer(char *buf, size_t size,
				    const char *base, int baselen,
				    struct pattern_list *pl);

/* Flags for add_patterns() */
#define PATTERN_NOFOLLOW

/*
 * Given a file with name "fname", read it (either from disk, or from
 * an index if 'istate' is non-null), parse it and store the
 * exclude rules in "pl".
 *
 * If "oid_stat" is not NULL, compute oid of the exclude file and fill
 * stat data from disk (only valid if add_patterns returns zero). If
 * oid_stat.valid is non-zero, "oid_stat" must contain good value as input.
 */
static int add_patterns(const char *fname, const char *base, int baselen,
			struct pattern_list *pl, struct index_state *istate,
			unsigned flags, struct oid_stat *oid_stat)
{}

static int add_patterns_from_buffer(char *buf, size_t size,
				    const char *base, int baselen,
				    struct pattern_list *pl)
{}

int add_patterns_from_file_to_list(const char *fname, const char *base,
				   int baselen, struct pattern_list *pl,
				   struct index_state *istate,
				   unsigned flags)
{}

int add_patterns_from_blob_to_list(
	struct object_id *oid,
	const char *base, int baselen,
	struct pattern_list *pl)
{}

struct pattern_list *add_pattern_list(struct dir_struct *dir,
				      int group_type, const char *src)
{}

/*
 * Used to set up core.excludesfile and .git/info/exclude lists.
 */
static void add_patterns_from_file_1(struct dir_struct *dir, const char *fname,
				     struct oid_stat *oid_stat)
{}

void add_patterns_from_file(struct dir_struct *dir, const char *fname)
{}

int match_basename(const char *basename, int basenamelen,
		   const char *pattern, int prefix, int patternlen,
		   unsigned flags)
{}

int match_pathname(const char *pathname, int pathlen,
		   const char *base, int baselen,
		   const char *pattern, int prefix, int patternlen)
{}

/*
 * Scan the given exclude list in reverse to see whether pathname
 * should be ignored.  The first match (i.e. the last on the list), if
 * any, determines the fate.  Returns the exclude_list element which
 * matched, or NULL for undecided.
 */
static struct path_pattern *last_matching_pattern_from_list(const char *pathname,
						       int pathlen,
						       const char *basename,
						       int *dtype,
						       struct pattern_list *pl,
						       struct index_state *istate)
{}

/*
 * Scan the list of patterns to determine if the ordered list
 * of patterns matches on 'pathname'.
 *
 * Return 1 for a match, 0 for not matched and -1 for undecided.
 */
enum pattern_match_result path_matches_pattern_list(
				const char *pathname, int pathlen,
				const char *basename, int *dtype,
				struct pattern_list *pl,
				struct index_state *istate)
{}

int init_sparse_checkout_patterns(struct index_state *istate)
{}

static int path_in_sparse_checkout_1(const char *path,
				     struct index_state *istate,
				     int require_cone_mode)
{}

int path_in_sparse_checkout(const char *path,
			    struct index_state *istate)
{}

int path_in_cone_mode_sparse_checkout(const char *path,
				     struct index_state *istate)
{}

static struct path_pattern *last_matching_pattern_from_lists(
		struct dir_struct *dir, struct index_state *istate,
		const char *pathname, int pathlen,
		const char *basename, int *dtype_p)
{}

/*
 * Loads the per-directory exclude list for the substring of base
 * which has a char length of baselen.
 */
static void prep_exclude(struct dir_struct *dir,
			 struct index_state *istate,
			 const char *base, int baselen)
{}

/*
 * Loads the exclude lists for the directory containing pathname, then
 * scans all exclude lists to determine whether pathname is excluded.
 * Returns the exclude_list element which matched, or NULL for
 * undecided.
 */
struct path_pattern *last_matching_pattern(struct dir_struct *dir,
				      struct index_state *istate,
				      const char *pathname,
				      int *dtype_p)
{}

/*
 * Loads the exclude lists for the directory containing pathname, then
 * scans all exclude lists to determine whether pathname is excluded.
 * Returns 1 if true, otherwise 0.
 */
int is_excluded(struct dir_struct *dir, struct index_state *istate,
		const char *pathname, int *dtype_p)
{}

static struct dir_entry *dir_entry_new(const char *pathname, int len)
{}

static struct dir_entry *dir_add_name(struct dir_struct *dir,
				      struct index_state *istate,
				      const char *pathname, int len)
{}

struct dir_entry *dir_add_ignored(struct dir_struct *dir,
				  struct index_state *istate,
				  const char *pathname, int len)
{}

enum exist_status {};

/*
 * Do not use the alphabetically sorted index to look up
 * the directory name; instead, use the case insensitive
 * directory hash.
 */
static enum exist_status directory_exists_in_index_icase(struct index_state *istate,
							 const char *dirname, int len)
{}

/*
 * The index sorts alphabetically by entry name, which
 * means that a gitlink sorts as '\0' at the end, while
 * a directory (which is defined not as an entry, but as
 * the files it contains) will sort with the '/' at the
 * end.
 */
static enum exist_status directory_exists_in_index(struct index_state *istate,
						   const char *dirname, int len)
{}

/*
 * When we find a directory when traversing the filesystem, we
 * have three distinct cases:
 *
 *  - ignore it
 *  - see it as a directory
 *  - recurse into it
 *
 * and which one we choose depends on a combination of existing
 * git index contents and the flags passed into the directory
 * traversal routine.
 *
 * Case 1: If we *already* have entries in the index under that
 * directory name, we always recurse into the directory to see
 * all the files.
 *
 * Case 2: If we *already* have that directory name as a gitlink,
 * we always continue to see it as a gitlink, regardless of whether
 * there is an actual git directory there or not (it might not
 * be checked out as a subproject!)
 *
 * Case 3: if we didn't have it in the index previously, we
 * have a few sub-cases:
 *
 *  (a) if DIR_SHOW_OTHER_DIRECTORIES flag is set, we show it as
 *      just a directory, unless DIR_HIDE_EMPTY_DIRECTORIES is
 *      also true, in which case we need to check if it contains any
 *      untracked and / or ignored files.
 *  (b) if it looks like a git directory and we don't have the
 *      DIR_NO_GITLINKS flag, then we treat it as a gitlink, and
 *      show it as a directory.
 *  (c) otherwise, we recurse into it.
 */
static enum path_treatment treat_directory(struct dir_struct *dir,
	struct index_state *istate,
	struct untracked_cache_dir *untracked,
	const char *dirname, int len, int baselen, int excluded,
	const struct pathspec *pathspec)
{}

/*
 * This is an inexact early pruning of any recursive directory
 * reading - if the path cannot possibly be in the pathspec,
 * return true, and we'll skip it early.
 */
static int simplify_away(const char *path, int pathlen,
			 const struct pathspec *pathspec)
{}

/*
 * This function tells us whether an excluded path matches a
 * list of "interesting" pathspecs. That is, whether a path matched
 * by any of the pathspecs could possibly be ignored by excluding
 * the specified path. This can happen if:
 *
 *   1. the path is mentioned explicitly in the pathspec
 *
 *   2. the path is a directory prefix of some element in the
 *      pathspec
 */
static int exclude_matches_pathspec(const char *path, int pathlen,
				    const struct pathspec *pathspec)
{}

static int get_index_dtype(struct index_state *istate,
			   const char *path, int len)
{}

unsigned char get_dtype(struct dirent *e, struct strbuf *path,
			int follow_symlink)
{}

static int resolve_dtype(int dtype, struct index_state *istate,
			 const char *path, int len)
{}

static enum path_treatment treat_path_fast(struct dir_struct *dir,
					   struct cached_dir *cdir,
					   struct index_state *istate,
					   struct strbuf *path,
					   int baselen,
					   const struct pathspec *pathspec)
{}

static enum path_treatment treat_path(struct dir_struct *dir,
				      struct untracked_cache_dir *untracked,
				      struct cached_dir *cdir,
				      struct index_state *istate,
				      struct strbuf *path,
				      int baselen,
				      const struct pathspec *pathspec)
{}

static void add_untracked(struct untracked_cache_dir *dir, const char *name)
{}

static int valid_cached_dir(struct dir_struct *dir,
			    struct untracked_cache_dir *untracked,
			    struct index_state *istate,
			    struct strbuf *path,
			    int check_only)
{}

static int open_cached_dir(struct cached_dir *cdir,
			   struct dir_struct *dir,
			   struct untracked_cache_dir *untracked,
			   struct index_state *istate,
			   struct strbuf *path,
			   int check_only)
{}

static int read_cached_dir(struct cached_dir *cdir)
{}

static void close_cached_dir(struct cached_dir *cdir)
{}

static void add_path_to_appropriate_result_list(struct dir_struct *dir,
	struct untracked_cache_dir *untracked,
	struct cached_dir *cdir,
	struct index_state *istate,
	struct strbuf *path,
	int baselen,
	const struct pathspec *pathspec,
	enum path_treatment state)
{}

/*
 * Read a directory tree. We currently ignore anything but
 * directories, regular files and symlinks. That's because git
 * doesn't handle them at all yet. Maybe that will change some
 * day.
 *
 * Also, we ignore the name ".git" (even if it is not a directory).
 * That likely will not change.
 *
 * If 'stop_at_first_file' is specified, 'path_excluded' is returned
 * to signal that a file was found. This is the least significant value that
 * indicates that a file was encountered that does not depend on the order of
 * whether an untracked or excluded path was encountered first.
 *
 * Returns the most significant path_treatment value encountered in the scan.
 * If 'stop_at_first_file' is specified, `path_excluded` is the most
 * significant path_treatment value that will be returned.
 */

static enum path_treatment read_directory_recursive(struct dir_struct *dir,
	struct index_state *istate, const char *base, int baselen,
	struct untracked_cache_dir *untracked, int check_only,
	int stop_at_first_file, const struct pathspec *pathspec)
{}

int cmp_dir_entry(const void *p1, const void *p2)
{}

/* check if *out lexically strictly contains *in */
int check_dir_entry_contains(const struct dir_entry *out, const struct dir_entry *in)
{}

static int treat_leading_path(struct dir_struct *dir,
			      struct index_state *istate,
			      const char *path, int len,
			      const struct pathspec *pathspec)
{}

static const char *get_ident_string(void)
{}

static int ident_in_untracked(const struct untracked_cache *uc)
{}

static void set_untracked_ident(struct untracked_cache *uc)
{}

static unsigned new_untracked_cache_flags(struct index_state *istate)
{}

static void new_untracked_cache(struct index_state *istate, int flags)
{}

void add_untracked_cache(struct index_state *istate)
{}

void remove_untracked_cache(struct index_state *istate)
{}

static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir,
						      int base_len,
						      const struct pathspec *pathspec,
						      struct index_state *istate)
{}

static void emit_traversal_statistics(struct dir_struct *dir,
				      struct repository *repo,
				      const char *path,
				      int path_len)
{}

int read_directory(struct dir_struct *dir, struct index_state *istate,
		   const char *path, int len, const struct pathspec *pathspec)
{}

int file_exists(const char *f)
{}

int repo_file_exists(struct repository *repo, const char *path)
{}

static int cmp_icase(char a, char b)
{}

/*
 * Given two normalized paths (a trailing slash is ok), if subdir is
 * outside dir, return -1.  Otherwise return the offset in subdir that
 * can be used as relative path to dir.
 */
int dir_inside_of(const char *subdir, const char *dir)
{}

int is_inside_dir(const char *dir)
{}

int is_empty_dir(const char *path)
{}

char *git_url_basename(const char *repo, int is_bundle, int is_bare)
{}

void strip_dir_trailing_slashes(char *dir)
{}

static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up)
{}

int remove_dir_recursively(struct strbuf *path, int flag)
{}

static GIT_PATH_FUNC(git_path_info_exclude, "info/exclude")

void setup_standard_excludes(struct dir_struct *dir)
{}

char *get_sparse_checkout_filename(void)
{}

int get_sparse_checkout_patterns(struct pattern_list *pl)
{}

int remove_path(const char *name)
{}

/*
 * Frees memory within dir which was allocated, and resets fields for further
 * use.  Does not free dir itself.
 */
void dir_clear(struct dir_struct *dir)
{}

struct ondisk_untracked_cache {};

#define ouc_offset(x)

struct write_data {};

static void stat_data_to_disk(struct stat_data *to, const struct stat_data *from)
{}

static void write_one_dir(struct untracked_cache_dir *untracked,
			  struct write_data *wd)
{}

void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
{}

static void free_untracked(struct untracked_cache_dir *ucd)
{}

void free_untracked_cache(struct untracked_cache *uc)
{}

struct read_data {};

static void stat_data_from_disk(struct stat_data *to, const unsigned char *data)
{}

static int read_one_dir(struct untracked_cache_dir **untracked_,
			struct read_data *rd)
{}

static void set_check_only(size_t pos, void *cb)
{}

static void read_stat(size_t pos, void *cb)
{}

static void read_oid(size_t pos, void *cb)
{}

static void load_oid_stat(struct oid_stat *oid_stat, const unsigned char *data,
			  const unsigned char *sha1)
{}

struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
{}

static void invalidate_one_directory(struct untracked_cache *uc,
				     struct untracked_cache_dir *ucd)
{}

/*
 * Normally when an entry is added or removed from a directory,
 * invalidating that directory is enough. No need to touch its
 * ancestors. When a directory is shown as "foo/bar/" in git-status
 * however, deleting or adding an entry may have cascading effect.
 *
 * Say the "foo/bar/file" has become untracked, we need to tell the
 * untracked_cache_dir of "foo" that "bar/" is not an untracked
 * directory any more (because "bar" is managed by foo as an untracked
 * "file").
 *
 * Similarly, if "foo/bar/file" moves from untracked to tracked and it
 * was the last untracked entry in the entire "foo", we should show
 * "foo/" instead. Which means we have to invalidate past "bar" up to
 * "foo".
 *
 * This function traverses all directories from root to leaf. If there
 * is a chance of one of the above cases happening, we invalidate back
 * to root. Otherwise we just invalidate the leaf. There may be a more
 * sophisticated way than checking for SHOW_OTHER_DIRECTORIES to
 * detect these cases and avoid unnecessary invalidation, for example,
 * checking for the untracked entry named "bar/" in "foo", but for now
 * stick to something safe and simple.
 */
static int invalidate_one_component(struct untracked_cache *uc,
				    struct untracked_cache_dir *dir,
				    const char *path, int len)
{}

void untracked_cache_invalidate_path(struct index_state *istate,
				     const char *path, int safe_path)
{}

void untracked_cache_invalidate_trimmed_path(struct index_state *istate,
					     const char *path,
					     int safe_path)
{}

void untracked_cache_remove_from_index(struct index_state *istate,
				       const char *path)
{}

void untracked_cache_add_to_index(struct index_state *istate,
				  const char *path)
{}

static void connect_wt_gitdir_in_nested(const char *sub_worktree,
					const char *sub_gitdir)
{}

void connect_work_tree_and_git_dir(const char *work_tree_,
				   const char *git_dir_,
				   int recurse_into_nested)
{}

/*
 * Migrate the git directory of the given path from old_git_dir to new_git_dir.
 */
void relocate_gitdir(const char *path, const char *old_git_dir, const char *new_git_dir)
{}

int path_match_flags(const char *const str, const enum path_match_flags flags)
{}