namei.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * Some corrections by tytso.
 */

/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
 * lookup logic.
 */
/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
 */

#include <linux/init.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/wordpart.h>
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/fsnotify.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/audit.h>
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/device_cgroup.h>
#include <linux/fs_struct.h>
#include <linux/posix_acl.h>
#include <linux/hash.h>
#include <linux/bitops.h>
#include <linux/init_task.h>
#include <linux/uaccess.h>

#include "internal.h"
#include "mount.h"

/* [Feb-1997 T. Schoebel-Theuer]
 * Fundamental changes in the pathname lookup mechanisms (namei)
 * were necessary because of omirr.  The reason is that omirr needs
 * to know the _real_ pathname, not the user-supplied one, in case
 * of symlinks (and also when transname replacements occur).
 *
 * The new code replaces the old recursive symlink resolution with
 * an iterative one (in case of non-nested symlink chains).  It does
 * this with calls to <fs>_follow_link().
 * As a side effect, dir_namei(), _namei() and follow_link() are now 
 * replaced with a single function lookup_dentry() that can handle all 
 * the special cases of the former code.
 *
 * With the new dcache, the pathname is stored at each inode, at least as
 * long as the refcount of the inode is positive.  As a side effect, the
 * size of the dcache depends on the inode cache and thus is dynamic.
 *
 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
 * resolution to correspond with current state of the code.
 *
 * Note that the symlink resolution is not *completely* iterative.
 * There is still a significant amount of tail- and mid- recursion in
 * the algorithm.  Also, note that <fs>_readlink() is not used in
 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
 * may return different results than <fs>_follow_link().  Many virtual
 * filesystems (including /proc) exhibit this behavior.
 */

/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
 * and the name already exists in form of a symlink, try to create the new
 * name indicated by the symlink. The old code always complained that the
 * name already exists, due to not following the symlink even if its target
 * is nonexistent.  The new semantics affects also mknod() and link() when
 * the name is a symlink pointing to a non-existent name.
 *
 * I don't know which semantics is the right one, since I have no access
 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
 * "old" one. Personally, I think the new semantics is much more logical.
 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
 * file does succeed in both HP-UX and SunOs, but not in Solaris
 * and in the old Linux semantics.
 */

/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
 * semantics.  See the comments in "open_namei" and "do_link" below.
 *
 * [10-Sep-98 Alan Modra] Another symlink change.
 */

/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
 *	inside the path - always follow.
 *	in the last component in creation/removal/renaming - never follow.
 *	if LOOKUP_FOLLOW passed - follow.
 *	if the pathname has trailing slashes - follow.
 *	otherwise - don't follow.
 * (applied in that order).
 *
 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
 * During the 2.4 we need to fix the userland stuff depending on it -
 * hopefully we will be able to get rid of that wart in 2.5. So far only
 * XEmacs seems to be relying on it...
 */
/*
 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
 * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
 * any extra contention...
 */

/* In order to reduce some races, while at the same time doing additional
 * checking and hopefully speeding things up, we copy filenames to the
 * kernel data space before using them..
 *
 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 * PATH_MAX includes the nul terminator --RR.
 */

#define EMBEDDED_NAME_MAX …

struct filename *
getname_flags(const char __user *filename, int flags)
{ … }

struct filename *
getname_uflags(const char __user *filename, int uflags)
{ … }

struct filename *
getname(const char __user * filename)
{ … }

struct filename *
getname_kernel(const char * filename)
{ … }
EXPORT_SYMBOL(…);

void putname(struct filename *name)
{ … }
EXPORT_SYMBOL(…);

/**
 * check_acl - perform ACL permission checking
 * @idmap:	idmap of the mount the inode was found from
 * @inode:	inode to check permissions on
 * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
 *
 * This function performs the ACL permission checking. Since this function
 * retrieve POSIX acls it needs to know whether it is called from a blocking or
 * non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
static int check_acl(struct mnt_idmap *idmap,
		     struct inode *inode, int mask)
{ … }

/**
 * acl_permission_check - perform basic UNIX permission checking
 * @idmap:	idmap of the mount the inode was found from
 * @inode:	inode to check permissions on
 * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
 *
 * This function performs the basic UNIX permission checking. Since this
 * function may retrieve POSIX acls it needs to know whether it is called from a
 * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
static int acl_permission_check(struct mnt_idmap *idmap,
				struct inode *inode, int mask)
{ … }

/**
 * generic_permission -  check for access rights on a Posix-like filesystem
 * @idmap:	idmap of the mount the inode was found from
 * @inode:	inode to check access rights for
 * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
 *		%MAY_NOT_BLOCK ...)
 *
 * Used to check for read/write/execute permissions on a file.
 * We use "fsuid" for this, letting us set arbitrary permissions
 * for filesystem access without changing the "normal" uids which
 * are used for other things.
 *
 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
 * request cannot be satisfied (eg. requires blocking or too much complexity).
 * It would then be called again in ref-walk mode.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
		       int mask)
{ … }
EXPORT_SYMBOL(…);

/**
 * do_inode_permission - UNIX permission checking
 * @idmap:	idmap of the mount the inode was found from
 * @inode:	inode to check permissions on
 * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
 *
 * We _really_ want to just do "generic_permission()" without
 * even looking at the inode->i_op values. So we keep a cache
 * flag in inode->i_opflags, that says "this has not special
 * permission function, use the fast case".
 */
static inline int do_inode_permission(struct mnt_idmap *idmap,
				      struct inode *inode, int mask)
{ … }

/**
 * sb_permission - Check superblock-level permissions
 * @sb: Superblock of inode to check permission on
 * @inode: Inode to check permission on
 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 *
 * Separate out file-system wide checks from inode-specific permission checks.
 */
static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
{ … }

/**
 * inode_permission - Check for access rights to a given inode
 * @idmap:	idmap of the mount the inode was found from
 * @inode:	Inode to check permission on
 * @mask:	Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 *
 * Check for read/write/execute permissions on an inode.  We use fs[ug]id for
 * this, letting us set arbitrary permissions for filesystem access without
 * changing the "normal" UIDs which are used for other things.
 *
 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
 */
int inode_permission(struct mnt_idmap *idmap,
		     struct inode *inode, int mask)
{ … }
EXPORT_SYMBOL(…);

/**
 * path_get - get a reference to a path
 * @path: path to get the reference to
 *
 * Given a path increment the reference count to the dentry and the vfsmount.
 */
void path_get(const struct path *path)
{ … }
EXPORT_SYMBOL(…);

/**
 * path_put - put a reference to a path
 * @path: path to put the reference to
 *
 * Given a path decrement the reference count to the dentry and the vfsmount.
 */
void path_put(const struct path *path)
{ … }
EXPORT_SYMBOL(…);

#define EMBEDDED_LEVELS …
struct nameidata { … } __randomize_layout;

#define ND_ROOT_PRESET …
#define ND_ROOT_GRABBED …
#define ND_JUMPED …

static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name)
{ … }

static inline void set_nameidata(struct nameidata *p, int dfd, struct filename *name,
			  const struct path *root)
{ … }

static void restore_nameidata(void)
{ … }

static bool nd_alloc_stack(struct nameidata *nd)
{ … }

/**
 * path_connected - Verify that a dentry is below mnt.mnt_root
 * @mnt: The mountpoint to check.
 * @dentry: The dentry to check.
 *
 * Rename can sometimes move a file or directory outside of a bind
 * mount, path_connected allows those cases to be detected.
 */
static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
{ … }

static void drop_links(struct nameidata *nd)
{ … }

static void leave_rcu(struct nameidata *nd)
{ … }

static void terminate_walk(struct nameidata *nd)
{ … }

/* path_put is needed afterwards regardless of success or failure */
static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
{ … }

static inline bool legitimize_path(struct nameidata *nd,
			    struct path *path, unsigned seq)
{ … }

static bool legitimize_links(struct nameidata *nd)
{ … }

static bool legitimize_root(struct nameidata *nd)
{ … }

/*
 * Path walking has 2 modes, rcu-walk and ref-walk (see
 * Documentation/filesystems/path-lookup.txt).  In situations when we can't
 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
 * normal reference counts on dentries and vfsmounts to transition to ref-walk
 * mode.  Refcounts are grabbed at the last known good point before rcu-walk
 * got stuck, so ref-walk may continue from there. If this is not successful
 * (eg. a seqcount has changed), then failure is returned and it's up to caller
 * to restart the path walk from the beginning in ref-walk mode.
 */

/**
 * try_to_unlazy - try to switch to ref-walk mode.
 * @nd: nameidata pathwalk data
 * Returns: true on success, false on failure
 *
 * try_to_unlazy attempts to legitimize the current nd->path and nd->root
 * for ref-walk mode.
 * Must be called from rcu-walk context.
 * Nothing should touch nameidata between try_to_unlazy() failure and
 * terminate_walk().
 */
static bool try_to_unlazy(struct nameidata *nd)
{ … }

/**
 * try_to_unlazy_next - try to switch to ref-walk mode.
 * @nd: nameidata pathwalk data
 * @dentry: next dentry to step into
 * Returns: true on success, false on failure
 *
 * Similar to try_to_unlazy(), but here we have the next dentry already
 * picked by rcu-walk and want to legitimize that in addition to the current
 * nd->path and nd->root for ref-walk mode.  Must be called from rcu-walk context.
 * Nothing should touch nameidata between try_to_unlazy_next() failure and
 * terminate_walk().
 */
static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
{ … }

static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
{ … }

/**
 * complete_walk - successful completion of path walk
 * @nd:  pointer nameidata
 *
 * If we had been in RCU mode, drop out of it and legitimize nd->path.
 * Revalidate the final result, unless we'd already done that during
 * the path walk or the filesystem doesn't ask for it.  Return 0 on
 * success, -error on failure.  In case of failure caller does not
 * need to drop nd->path.
 */
static int complete_walk(struct nameidata *nd)
{ … }

static int set_root(struct nameidata *nd)
{ … }

static int nd_jump_root(struct nameidata *nd)
{ … }

/*
 * Helper to directly jump to a known parsed path from ->get_link,
 * caller must have taken a reference to path beforehand.
 */
int nd_jump_link(const struct path *path)
{ … }

static inline void put_link(struct nameidata *nd)
{ … }

static int sysctl_protected_symlinks __read_mostly;
static int sysctl_protected_hardlinks __read_mostly;
static int sysctl_protected_fifos __read_mostly;
static int sysctl_protected_regular __read_mostly;

#ifdef CONFIG_SYSCTL
static struct ctl_table namei_sysctls[] = …;

static int __init init_fs_namei_sysctls(void)
{ … }
fs_initcall(init_fs_namei_sysctls);

#endif /* CONFIG_SYSCTL */

/**
 * may_follow_link - Check symlink following for unsafe situations
 * @nd: nameidata pathwalk data
 * @inode: Used for idmapping.
 *
 * In the case of the sysctl_protected_symlinks sysctl being enabled,
 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
 * in a sticky world-writable directory. This is to protect privileged
 * processes from failing races against path names that may change out
 * from under them by way of other users creating malicious symlinks.
 * It will permit symlinks to be followed only when outside a sticky
 * world-writable directory, or when the uid of the symlink and follower
 * match, or when the directory owner matches the symlink's owner.
 *
 * Returns 0 if following the symlink is allowed, -ve on error.
 */
static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
{ … }

/**
 * safe_hardlink_source - Check for safe hardlink conditions
 * @idmap: idmap of the mount the inode was found from
 * @inode: the source inode to hardlink from
 *
 * Return false if at least one of the following conditions:
 *    - inode is not a regular file
 *    - inode is setuid
 *    - inode is setgid and group-exec
 *    - access failure for read and write
 *
 * Otherwise returns true.
 */
static bool safe_hardlink_source(struct mnt_idmap *idmap,
				 struct inode *inode)
{ … }

/**
 * may_linkat - Check permissions for creating a hardlink
 * @idmap: idmap of the mount the inode was found from
 * @link:  the source to hardlink from
 *
 * Block hardlink when all of:
 *  - sysctl_protected_hardlinks enabled
 *  - fsuid does not match inode
 *  - hardlink source is unsafe (see safe_hardlink_source() above)
 *  - not CAP_FOWNER in a namespace with the inode owner uid mapped
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 *
 * Returns 0 if successful, -ve on error.
 */
int may_linkat(struct mnt_idmap *idmap, const struct path *link)
{ … }

/**
 * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
 *			  should be allowed, or not, on files that already
 *			  exist.
 * @idmap: idmap of the mount the inode was found from
 * @nd: nameidata pathwalk data
 * @inode: the inode of the file to open
 *
 * Block an O_CREAT open of a FIFO (or a regular file) when:
 *   - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
 *   - the file already exists
 *   - we are in a sticky directory
 *   - we don't own the file
 *   - the owner of the directory doesn't own the file
 *   - the directory is world writable
 * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
 * the directory doesn't have to be world writable: being group writable will
 * be enough.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 *
 * Returns 0 if the open is allowed, -ve on error.
 */
static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd,
				struct inode *const inode)
{ … }

/*
 * follow_up - Find the mountpoint of path's vfsmount
 *
 * Given a path, find the mountpoint of its source file system.
 * Replace @path with the path of the mountpoint in the parent mount.
 * Up is towards /.
 *
 * Return 1 if we went up a level and 0 if we were already at the
 * root.
 */
int follow_up(struct path *path)
{ … }
EXPORT_SYMBOL(…);

static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
				  struct path *path, unsigned *seqp)
{ … }

static bool choose_mountpoint(struct mount *m, const struct path *root,
			      struct path *path)
{ … }

/*
 * Perform an automount
 * - return -EISDIR to tell follow_managed() to stop and return the path we
 *   were called with.
 */
static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
{ … }

/*
 * mount traversal - out-of-line part.  One note on ->d_flags accesses -
 * dentries are pinned but not locked here, so negative dentry can go
 * positive right under us.  Use of smp_load_acquire() provides a barrier
 * sufficient for ->d_inode and ->d_flags consistency.
 */
static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
			     int *count, unsigned lookup_flags)
{ … }

static inline int traverse_mounts(struct path *path, bool *jumped,
				  int *count, unsigned lookup_flags)
{ … }

int follow_down_one(struct path *path)
{ … }
EXPORT_SYMBOL(…);

/*
 * Follow down to the covering mount currently visible to userspace.  At each
 * point, the filesystem owning that dentry may be queried as to whether the
 * caller is permitted to proceed or not.
 */
int follow_down(struct path *path, unsigned int flags)
{ … }
EXPORT_SYMBOL(…);

/*
 * Try to skip to top of mountpoint pile in rcuwalk mode.  Fail if
 * we meet a managed dentry that would need blocking.
 */
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path)
{ … }

static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
			  struct path *path)
{ … }

/*
 * This looks up the name in dcache and possibly revalidates the found dentry.
 * NULL is returned if the dentry does not exist in the cache.
 */
static struct dentry *lookup_dcache(const struct qstr *name,
				    struct dentry *dir,
				    unsigned int flags)
{ … }

/*
 * Parent directory has inode locked exclusive.  This is one
 * and only case when ->lookup() gets called on non in-lookup
 * dentries - as the matter of fact, this only gets called
 * when directory is guaranteed to have no in-lookup children
 * at all.
 */
struct dentry *lookup_one_qstr_excl(const struct qstr *name,
				    struct dentry *base,
				    unsigned int flags)
{ … }
EXPORT_SYMBOL(…);

static struct dentry *lookup_fast(struct nameidata *nd)
{ … }

/* Fast lookup failed, do it the slow way */
static struct dentry *__lookup_slow(const struct qstr *name,
				    struct dentry *dir,
				    unsigned int flags)
{ … }

static struct dentry *lookup_slow(const struct qstr *name,
				  struct dentry *dir,
				  unsigned int flags)
{ … }

static inline int may_lookup(struct mnt_idmap *idmap,
			     struct nameidata *restrict nd)
{ … }

static int reserve_stack(struct nameidata *nd, struct path *link)
{ … }

enum { … };

static const char *pick_link(struct nameidata *nd, struct path *link,
		     struct inode *inode, int flags)
{ … }

/*
 * Do we need to follow links? We _really_ want to be able
 * to do this check without having to look at inode->i_op,
 * so we keep a cache of "no, this doesn't need follow_link"
 * for the common case.
 *
 * NOTE: dentry must be what nd->next_seq had been sampled from.
 */
static const char *step_into(struct nameidata *nd, int flags,
		     struct dentry *dentry)
{ … }

static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
{ … }

static struct dentry *follow_dotdot(struct nameidata *nd)
{ … }

static const char *handle_dots(struct nameidata *nd, int type)
{ … }

static const char *walk_component(struct nameidata *nd, int flags)
{ … }

/*
 * We can do the critical dentry name comparison and hashing
 * operations one word at a time, but we are limited to:
 *
 * - Architectures with fast unaligned word accesses. We could
 *   do a "get_unaligned()" if this helps and is sufficiently
 *   fast.
 *
 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
 *   do not trap on the (extremely unlikely) case of a page
 *   crossing operation.
 *
 * - Furthermore, we need an efficient 64-bit compile for the
 *   64-bit case in order to generate the "number of bytes in
 *   the final mask". Again, that could be replaced with a
 *   efficient population count instruction or similar.
 */
#ifdef CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>

#ifdef HASH_MIX

/* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */

#elif defined(CONFIG_64BIT)
/*
 * Register pressure in the mixing function is an issue, particularly
 * on 32-bit x86, but almost any function requires one state value and
 * one temporary.  Instead, use a function designed for two state values
 * and no temporaries.
 *
 * This function cannot create a collision in only two iterations, so
 * we have two iterations to achieve avalanche.  In those two iterations,
 * we have six layers of mixing, which is enough to spread one bit's
 * influence out to 2^6 = 64 state bits.
 *
 * Rotate constants are scored by considering either 64 one-bit input
 * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
 * probability of that delta causing a change to each of the 128 output
 * bits, using a sample of random initial states.
 *
 * The Shannon entropy of the computed probabilities is then summed
 * to produce a score.  Ideally, any input change has a 50% chance of
 * toggling any given output bit.
 *
 * Mixing scores (in bits) for (12,45):
 * Input delta: 1-bit      2-bit
 * 1 round:     713.3    42542.6
 * 2 rounds:   2753.7   140389.8
 * 3 rounds:   5954.1   233458.2
 * 4 rounds:   7862.6   256672.2
 * Perfect:    8192     258048
 *            (64*128) (64*63/2 * 128)
 */
#define HASH_MIX(x, y, a) …

/*
 * Fold two longs into one 32-bit hash value.  This must be fast, but
 * latency isn't quite as critical, as there is a fair bit of additional
 * work done before the hash value is used.
 */
static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{ … }

#else	/* 32-bit case */

/*
 * Mixing scores (in bits) for (7,20):
 * Input delta: 1-bit      2-bit
 * 1 round:     330.3     9201.6
 * 2 rounds:   1246.4    25475.4
 * 3 rounds:   1907.1    31295.1
 * 4 rounds:   2042.3    31718.6
 * Perfect:    2048      31744
 *            (32*64)   (32*31/2 * 64)
 */
#define HASH_MIX …

static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{
	/* Use arch-optimized multiply if one exists */
	return __hash_32(y ^ __hash_32(x));
}

#endif

/*
 * Return the hash of a string of known length.  This is carfully
 * designed to match hash_name(), which is the more critical function.
 * In particular, we must end by hashing a final word containing 0..7
 * payload bytes, to match the way that hash_name() iterates until it
 * finds the delimiter after the name.
 */
unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
{ … }
EXPORT_SYMBOL(…);

/* Return the "hash_len" (hash and length) of a null-terminated string */
u64 hashlen_string(const void *salt, const char *name)
{ … }
EXPORT_SYMBOL(…);

/*
 * Calculate the length and hash of the path component, and
 * return the length as the result.
 */
static inline const char *hash_name(struct nameidata *nd,
				    const char *name,
				    unsigned long *lastword)
{ … }

/*
 * Note that the 'last' word is always zero-masked, but
 * was loaded as a possibly big-endian word.
 */
#ifdef __BIG_ENDIAN
  #define LAST_WORD_IS_DOT …
  #define LAST_WORD_IS_DOTDOT …
#endif

#else	/* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */

/* Return the hash of a string of known length */
unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
{
	unsigned long hash = init_name_hash(salt);
	while (len--)
		hash = partial_name_hash((unsigned char)*name++, hash);
	return end_name_hash(hash);
}
EXPORT_SYMBOL(full_name_hash);

/* Return the "hash_len" (hash and length) of a null-terminated string */
u64 hashlen_string(const void *salt, const char *name)
{
	unsigned long hash = init_name_hash(salt);
	unsigned long len = 0, c;

	c = (unsigned char)*name;
	while (c) {
		len++;
		hash = partial_name_hash(c, hash);
		c = (unsigned char)name[len];
	}
	return hashlen_create(end_name_hash(hash), len);
}
EXPORT_SYMBOL(hashlen_string);

/*
 * We know there's a real path component here of at least
 * one character.
 */
static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword)
{
	unsigned long hash = init_name_hash(nd->path.dentry);
	unsigned long len = 0, c, last = 0;

	c = (unsigned char)*name;
	do {
		last = (last << 8) + c;
		len++;
		hash = partial_name_hash(c, hash);
		c = (unsigned char)name[len];
	} while (c && c != '/');

	// This is reliable for DOT or DOTDOT, since the component
	// cannot contain NUL characters - top bits being zero means
	// we cannot have had any other pathnames.
	*lastword = last;
	nd->last.hash = end_name_hash(hash);
	nd->last.len = len;
	return name + len;
}

#endif

#ifndef LAST_WORD_IS_DOT
  #define LAST_WORD_IS_DOT …
  #define LAST_WORD_IS_DOTDOT …
#endif

/*
 * Name resolution.
 * This is the basic name resolution function, turning a pathname into
 * the final dentry. We expect 'base' to be positive and a directory.
 *
 * Returns 0 and nd will have valid dentry and mnt on success.
 * Returns error and drops reference to input namei data on failure.
 */
static int link_path_walk(const char *name, struct nameidata *nd)
{ … }

/* must be paired with terminate_walk() */
static const char *path_init(struct nameidata *nd, unsigned flags)
{ … }

static inline const char *lookup_last(struct nameidata *nd)
{ … }

static int handle_lookup_down(struct nameidata *nd)
{ … }

/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
{ … }

int filename_lookup(int dfd, struct filename *name, unsigned flags,
		    struct path *path, struct path *root)
{ … }

/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
static int path_parentat(struct nameidata *nd, unsigned flags,
				struct path *parent)
{ … }

/* Note: this does not consume "name" */
static int __filename_parentat(int dfd, struct filename *name,
			       unsigned int flags, struct path *parent,
			       struct qstr *last, int *type,
			       const struct path *root)
{ … }

static int filename_parentat(int dfd, struct filename *name,
			     unsigned int flags, struct path *parent,
			     struct qstr *last, int *type)
{ … }

/* does lookup, returns the object with parent locked */
static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path)
{ … }

struct dentry *kern_path_locked(const char *name, struct path *path)
{ … }

struct dentry *user_path_locked_at(int dfd, const char __user *name, struct path *path)
{ … }
EXPORT_SYMBOL(…);

int kern_path(const char *name, unsigned int flags, struct path *path)
{ … }
EXPORT_SYMBOL(…);

/**
 * vfs_path_parent_lookup - lookup a parent path relative to a dentry-vfsmount pair
 * @filename: filename structure
 * @flags: lookup flags
 * @parent: pointer to struct path to fill
 * @last: last component
 * @type: type of the last component
 * @root: pointer to struct path of the base directory
 */
int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
			   struct path *parent, struct qstr *last, int *type,
			   const struct path *root)
{ … }
EXPORT_SYMBOL(…);

/**
 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
 * @dentry:  pointer to dentry of the base directory
 * @mnt: pointer to vfs mount of the base directory
 * @name: pointer to file name
 * @flags: lookup flags
 * @path: pointer to struct path to fill
 */
int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
		    const char *name, unsigned int flags,
		    struct path *path)
{ … }
EXPORT_SYMBOL(…);

static int lookup_one_common(struct mnt_idmap *idmap,
			     const char *name, struct dentry *base, int len,
			     struct qstr *this)
{ … }

/**
 * try_lookup_one_len - filesystem helper to lookup single pathname component
 * @name:	pathname component to lookup
 * @base:	base directory to lookup from
 * @len:	maximum length @len should be interpreted to
 *
 * Look up a dentry by name in the dcache, returning NULL if it does not
 * currently exist.  The function does not try to create a dentry.
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * The caller must hold base->i_mutex.
 */
struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
{ … }
EXPORT_SYMBOL(…);

/**
 * lookup_one_len - filesystem helper to lookup single pathname component
 * @name:	pathname component to lookup
 * @base:	base directory to lookup from
 * @len:	maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * The caller must hold base->i_mutex.
 */
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{ … }
EXPORT_SYMBOL(…);

/**
 * lookup_one - filesystem helper to lookup single pathname component
 * @idmap:	idmap of the mount the lookup is performed from
 * @name:	pathname component to lookup
 * @base:	base directory to lookup from
 * @len:	maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * The caller must hold base->i_mutex.
 */
struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name,
			  struct dentry *base, int len)
{ … }
EXPORT_SYMBOL(…);

/**
 * lookup_one_unlocked - filesystem helper to lookup single pathname component
 * @idmap:	idmap of the mount the lookup is performed from
 * @name:	pathname component to lookup
 * @base:	base directory to lookup from
 * @len:	maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * Unlike lookup_one_len, it should be called without the parent
 * i_mutex held, and will take the i_mutex itself if necessary.
 */
struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
				   const char *name, struct dentry *base,
				   int len)
{ … }
EXPORT_SYMBOL(…);

/**
 * lookup_one_positive_unlocked - filesystem helper to lookup single
 *				  pathname component
 * @idmap:	idmap of the mount the lookup is performed from
 * @name:	pathname component to lookup
 * @base:	base directory to lookup from
 * @len:	maximum length @len should be interpreted to
 *
 * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns
 * known positive or ERR_PTR(). This is what most of the users want.
 *
 * Note that pinned negative with unlocked parent _can_ become positive at any
 * time, so callers of lookup_one_unlocked() need to be very careful; pinned
 * positives have >d_inode stable, so this one avoids such problems.
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * The helper should be called without i_mutex held.
 */
struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
					    const char *name,
					    struct dentry *base, int len)
{ … }
EXPORT_SYMBOL(…);

/**
 * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
 * @name:	pathname component to lookup
 * @base:	base directory to lookup from
 * @len:	maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * Unlike lookup_one_len, it should be called without the parent
 * i_mutex held, and will take the i_mutex itself if necessary.
 */
struct dentry *lookup_one_len_unlocked(const char *name,
				       struct dentry *base, int len)
{ … }
EXPORT_SYMBOL(…);

/*
 * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
 * on negatives.  Returns known positive or ERR_PTR(); that's what
 * most of the users want.  Note that pinned negative with unlocked parent
 * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
 * need to be very careful; pinned positives have ->d_inode stable, so
 * this one avoids such problems.
 */
struct dentry *lookup_positive_unlocked(const char *name,
				       struct dentry *base, int len)
{ … }
EXPORT_SYMBOL(…);

#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{ … }
#endif

int user_path_at(int dfd, const char __user *name, unsigned flags,
		 struct path *path)
{ … }
EXPORT_SYMBOL(…);

int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
		   struct inode *inode)
{ … }
EXPORT_SYMBOL(…);

/*
 *	Check whether we can remove a link victim from directory dir, check
 *  whether the type of victim is right.
 *  1. We can't do it if dir is read-only (done in permission())
 *  2. We should have write and exec permissions on dir
 *  3. We can't remove anything from append-only dir
 *  4. We can't do anything with immutable dir (done in permission())
 *  5. If the sticky bit on dir is set we should either
 *	a. be owner of dir, or
 *	b. be owner of victim, or
 *	c. have CAP_FOWNER capability
 *  6. If the victim is append-only or immutable we can't do antyhing with
 *     links pointing to it.
 *  7. If the victim has an unknown uid or gid we can't change the inode.
 *  8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 *  9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 * 10. We can't remove a root or mountpoint.
 * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
 *     nfs_async_unlink().
 */
static int may_delete(struct mnt_idmap *idmap, struct inode *dir,
		      struct dentry *victim, bool isdir)
{ … }

/*	Check whether we can create an object with dentry child in directory
 *  dir.
 *  1. We can't do it if child already exists (open has special treatment for
 *     this case, but since we are inlined it's OK)
 *  2. We can't do it if dir is read-only (done in permission())
 *  3. We can't do it if the fs can't represent the fsuid or fsgid.
 *  4. We should have write and exec permissions on dir
 *  5. We can't do it if dir is immutable (done in permission())
 */
static inline int may_create(struct mnt_idmap *idmap,
			     struct inode *dir, struct dentry *child)
{ … }

// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
{ … }

/*
 * p1 and p2 should be directories on the same fs.
 */
struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
{ … }
EXPORT_SYMBOL(…);

/*
 * c1 and p2 should be on the same fs.
 */
struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
{ … }
EXPORT_SYMBOL(…);

void unlock_rename(struct dentry *p1, struct dentry *p2)
{ … }
EXPORT_SYMBOL(…);

/**
 * vfs_prepare_mode - prepare the mode to be used for a new inode
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	parent directory of the new inode
 * @mode:	mode of the new inode
 * @mask_perms:	allowed permission by the vfs
 * @type:	type of file to be created
 *
 * This helper consolidates and enforces vfs restrictions on the @mode of a new
 * object to be created.
 *
 * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
 * the kernel documentation for mode_strip_umask()). Moving umask stripping
 * after setgid stripping allows the same ordering for both non-POSIX ACL and
 * POSIX ACL supporting filesystems.
 *
 * Note that it's currently valid for @type to be 0 if a directory is created.
 * Filesystems raise that flag individually and we need to check whether each
 * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
 * non-zero type.
 *
 * Returns: mode to be passed to the filesystem
 */
static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
				       const struct inode *dir, umode_t mode,
				       umode_t mask_perms, umode_t type)
{ … }

/**
 * vfs_create - create new file
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	inode of the parent directory
 * @dentry:	dentry of the child file
 * @mode:	mode of the child file
 * @want_excl:	whether the file must not yet exist
 *
 * Create a new file.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
	       struct dentry *dentry, umode_t mode, bool want_excl)
{ … }
EXPORT_SYMBOL(…);

int vfs_mkobj(struct dentry *dentry, umode_t mode,
		int (*f)(struct dentry *, umode_t, void *),
		void *arg)
{ … }
EXPORT_SYMBOL(…);

bool may_open_dev(const struct path *path)
{ … }

static int may_open(struct mnt_idmap *idmap, const struct path *path,
		    int acc_mode, int flag)
{ … }

static int handle_truncate(struct mnt_idmap *idmap, struct file *filp)
{ … }

static inline int open_to_namei_flags(int flag)
{ … }

static int may_o_create(struct mnt_idmap *idmap,
			const struct path *dir, struct dentry *dentry,
			umode_t mode)
{ … }

/*
 * Attempt to atomically look up, create and open a file from a negative
 * dentry.
 *
 * Returns 0 if successful.  The file will have been created and attached to
 * @file by the filesystem calling finish_open().
 *
 * If the file was looked up only or didn't need creating, FMODE_OPENED won't
 * be set.  The caller will need to perform the open themselves.  @path will
 * have been updated to point to the new dentry.  This may be negative.
 *
 * Returns an error code otherwise.
 */
static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
				  struct file *file,
				  int open_flag, umode_t mode)
{ … }

/*
 * Look up and maybe create and open the last component.
 *
 * Must be called with parent locked (exclusive in O_CREAT case).
 *
 * Returns 0 on success, that is, if
 *  the file was successfully atomically created (if necessary) and opened, or
 *  the file was not completely opened at this time, though lookups and
 *  creations were performed.
 * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
 * In the latter case dentry returned in @path might be negative if O_CREAT
 * hadn't been specified.
 *
 * An error code is returned on failure.
 */
static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
				  const struct open_flags *op,
				  bool got_write)
{ … }

static const char *open_last_lookups(struct nameidata *nd,
		   struct file *file, const struct open_flags *op)
{ … }

/*
 * Handle the last step of open()
 */
static int do_open(struct nameidata *nd,
		   struct file *file, const struct open_flags *op)
{ … }

/**
 * vfs_tmpfile - create tmpfile
 * @idmap:	idmap of the mount the inode was found from
 * @parentpath:	pointer to the path of the base directory
 * @file:	file descriptor of the new tmpfile
 * @mode:	mode of the new tmpfile
 *
 * Create a temporary file.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_tmpfile(struct mnt_idmap *idmap,
		const struct path *parentpath,
		struct file *file, umode_t mode)
{ … }

/**
 * kernel_tmpfile_open - open a tmpfile for kernel internal use
 * @idmap:	idmap of the mount the inode was found from
 * @parentpath:	path of the base directory
 * @mode:	mode of the new tmpfile
 * @open_flag:	flags
 * @cred:	credentials for open
 *
 * Create and open a temporary file.  The file is not accounted in nr_files,
 * hence this is only for kernel internal use, and must not be installed into
 * file tables or such.
 */
struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
				 const struct path *parentpath,
				 umode_t mode, int open_flag,
				 const struct cred *cred)
{ … }
EXPORT_SYMBOL(…);

static int do_tmpfile(struct nameidata *nd, unsigned flags,
		const struct open_flags *op,
		struct file *file)
{ … }

static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
{ … }

static struct file *path_openat(struct nameidata *nd,
			const struct open_flags *op, unsigned flags)
{ … }

struct file *do_filp_open(int dfd, struct filename *pathname,
		const struct open_flags *op)
{ … }

struct file *do_file_open_root(const struct path *root,
		const char *name, const struct open_flags *op)
{ … }

static struct dentry *filename_create(int dfd, struct filename *name,
				      struct path *path, unsigned int lookup_flags)
{ … }

struct dentry *kern_path_create(int dfd, const char *pathname,
				struct path *path, unsigned int lookup_flags)
{ … }
EXPORT_SYMBOL(…);

void done_path_create(struct path *path, struct dentry *dentry)
{ … }
EXPORT_SYMBOL(…);

inline struct dentry *user_path_create(int dfd, const char __user *pathname,
				struct path *path, unsigned int lookup_flags)
{ … }
EXPORT_SYMBOL(…);

/**
 * vfs_mknod - create device node or file
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	inode of the parent directory
 * @dentry:	dentry of the child device node
 * @mode:	mode of the child device node
 * @dev:	device number of device to create
 *
 * Create a device node or file.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
	      struct dentry *dentry, umode_t mode, dev_t dev)
{ … }
EXPORT_SYMBOL(…);

static int may_mknod(umode_t mode)
{ … }

static int do_mknodat(int dfd, struct filename *name, umode_t mode,
		unsigned int dev)
{ … }

SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
		unsigned int, dev)
{ … }

SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{ … }

/**
 * vfs_mkdir - create directory
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	inode of the parent directory
 * @dentry:	dentry of the child directory
 * @mode:	mode of the child directory
 *
 * Create a directory.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
	      struct dentry *dentry, umode_t mode)
{ … }
EXPORT_SYMBOL(…);

int do_mkdirat(int dfd, struct filename *name, umode_t mode)
{ … }

SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{ … }

SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{ … }

/**
 * vfs_rmdir - remove directory
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	inode of the parent directory
 * @dentry:	dentry of the child directory
 *
 * Remove a directory.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
		     struct dentry *dentry)
{ … }
EXPORT_SYMBOL(…);

int do_rmdir(int dfd, struct filename *name)
{ … }

SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
{ … }

/**
 * vfs_unlink - unlink a filesystem object
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	parent directory
 * @dentry:	victim
 * @delegated_inode: returns victim inode, if the inode is delegated.
 *
 * The caller must hold dir->i_mutex.
 *
 * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
 * return a reference to the inode in delegated_inode.  The caller
 * should then break the delegation on that inode and retry.  Because
 * breaking a delegation may take a long time, the caller should drop
 * dir->i_mutex before doing so.
 *
 * Alternatively, a caller may pass NULL for delegated_inode.  This may
 * be appropriate for callers that expect the underlying filesystem not
 * to be NFS exported.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
	       struct dentry *dentry, struct inode **delegated_inode)
{ … }
EXPORT_SYMBOL(…);

/*
 * Make sure that the actual truncation of the file will occur outside its
 * directory's i_mutex.  Truncate can take a long time if there is a lot of
 * writeout happening, and we don't want to prevent access to the directory
 * while waiting on the I/O.
 */
int do_unlinkat(int dfd, struct filename *name)
{ … }

SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
{ … }

SYSCALL_DEFINE1(unlink, const char __user *, pathname)
{ … }

/**
 * vfs_symlink - create symlink
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	inode of the parent directory
 * @dentry:	dentry of the child symlink file
 * @oldname:	name of the file to link to
 *
 * Create a symlink.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
		struct dentry *dentry, const char *oldname)
{ … }
EXPORT_SYMBOL(…);

int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
{ … }

SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
		int, newdfd, const char __user *, newname)
{ … }

SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
{ … }

/**
 * vfs_link - create a new link
 * @old_dentry:	object to be linked
 * @idmap:	idmap of the mount
 * @dir:	new parent
 * @new_dentry:	where to create the new link
 * @delegated_inode: returns inode needing a delegation break
 *
 * The caller must hold dir->i_mutex
 *
 * If vfs_link discovers a delegation on the to-be-linked file in need
 * of breaking, it will return -EWOULDBLOCK and return a reference to the
 * inode in delegated_inode.  The caller should then break the delegation
 * and retry.  Because breaking a delegation may take a long time, the
 * caller should drop the i_mutex before doing so.
 *
 * Alternatively, a caller may pass NULL for delegated_inode.  This may
 * be appropriate for callers that expect the underlying filesystem not
 * to be NFS exported.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
	     struct inode *dir, struct dentry *new_dentry,
	     struct inode **delegated_inode)
{ … }
EXPORT_SYMBOL(…);

/*
 * Hardlinks are often used in delicate situations.  We avoid
 * security-related surprises by not following symlinks on the
 * newname.  --KAB
 *
 * We don't follow them on the oldname either to be compatible
 * with linux 2.0, and to avoid hard-linking to directories
 * and other special files.  --ADM
 */
int do_linkat(int olddfd, struct filename *old, int newdfd,
	      struct filename *new, int flags)
{ … }

SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
		int, newdfd, const char __user *, newname, int, flags)
{ … }

SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
{ … }

/**
 * vfs_rename - rename a filesystem object
 * @rd:		pointer to &struct renamedata info
 *
 * The caller must hold multiple mutexes--see lock_rename()).
 *
 * If vfs_rename discovers a delegation in need of breaking at either
 * the source or destination, it will return -EWOULDBLOCK and return a
 * reference to the inode in delegated_inode.  The caller should then
 * break the delegation and retry.  Because breaking a delegation may
 * take a long time, the caller should drop all locks before doing
 * so.
 *
 * Alternatively, a caller may pass NULL for delegated_inode.  This may
 * be appropriate for callers that expect the underlying filesystem not
 * to be NFS exported.
 *
 * The worst of all namespace operations - renaming directory. "Perverted"
 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
 * Problems:
 *
 *	a) we can get into loop creation.
 *	b) race potential - two innocent renames can create a loop together.
 *	   That's where 4.4BSD screws up. Current fix: serialization on
 *	   sb->s_vfs_rename_mutex. We might be more accurate, but that's another
 *	   story.
 *	c) we may have to lock up to _four_ objects - parents and victim (if it exists),
 *	   and source (if it's a non-directory or a subdirectory that moves to
 *	   different parent).
 *	   And that - after we got ->i_mutex on parents (until then we don't know
 *	   whether the target exists).  Solution: try to be smart with locking
 *	   order for inodes.  We rely on the fact that tree topology may change
 *	   only under ->s_vfs_rename_mutex _and_ that parent of the object we
 *	   move will be locked.  Thus we can rank directories by the tree
 *	   (ancestors first) and rank all non-directories after them.
 *	   That works since everybody except rename does "lock parent, lookup,
 *	   lock child" and rename is under ->s_vfs_rename_mutex.
 *	   HOWEVER, it relies on the assumption that any object with ->lookup()
 *	   has no more than 1 dentry.  If "hybrid" objects will ever appear,
 *	   we'd better make sure that there's no link(2) for them.
 *	d) conversion from fhandle to dentry may come in the wrong moment - when
 *	   we are removing the target. Solution: we will have to grab ->i_mutex
 *	   in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
 *	   ->i_mutex on parents, which works but leads to some truly excessive
 *	   locking].
 */
int vfs_rename(struct renamedata *rd)
{ … }
EXPORT_SYMBOL(…);

int do_renameat2(int olddfd, struct filename *from, int newdfd,
		 struct filename *to, unsigned int flags)
{ … }

SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
		int, newdfd, const char __user *, newname, unsigned int, flags)
{ … }

SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
		int, newdfd, const char __user *, newname)
{ … }

SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
{ … }

int readlink_copy(char __user *buffer, int buflen, const char *link)
{ … }

/**
 * vfs_readlink - copy symlink body into userspace buffer
 * @dentry: dentry on which to get symbolic link
 * @buffer: user memory pointer
 * @buflen: size of buffer
 *
 * Does not touch atime.  That's up to the caller if necessary
 *
 * Does not call security hook.
 */
int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{ … }
EXPORT_SYMBOL(…);

/**
 * vfs_get_link - get symlink body
 * @dentry: dentry on which to get symbolic link
 * @done: caller needs to free returned data with this
 *
 * Calls security hook and i_op->get_link() on the supplied inode.
 *
 * It does not touch atime.  That's up to the caller if necessary.
 *
 * Does not work on "special" symlinks like /proc/$$/fd/N
 */
const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
{ … }
EXPORT_SYMBOL(…);

/* get the link contents into pagecache */
const char *page_get_link(struct dentry *dentry, struct inode *inode,
			  struct delayed_call *callback)
{ … }

EXPORT_SYMBOL(…);

void page_put_link(void *arg)
{ … }
EXPORT_SYMBOL(…);

int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{ … }
EXPORT_SYMBOL(…);

int page_symlink(struct inode *inode, const char *symname, int len)
{ … }
EXPORT_SYMBOL(…);

const struct inode_operations page_symlink_inode_operations = …;
EXPORT_SYMBOL(…);