// SPDX-License-Identifier: GPL-2.0 /* * File operations used by nfsd. Some of these have been ripped from * other parts of the kernel because they weren't exported, others * are partial duplicates with added or changed functionality. * * Note that several functions dget() the dentry upon which they want * to act, most notably those that create directory entries. Response * dentry's are dput()'d if necessary in the release callback. * So if you notice code paths that apparently fail to dput() the * dentry, don't worry--they have been taken care of. * * Copyright (C) 1995-1999 Olaf Kirch <[email protected]> * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <[email protected]> */ #include <linux/fs.h> #include <linux/file.h> #include <linux/splice.h> #include <linux/falloc.h> #include <linux/fcntl.h> #include <linux/namei.h> #include <linux/delay.h> #include <linux/fsnotify.h> #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> #include <linux/jhash.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/exportfs.h> #include <linux/writeback.h> #include <linux/security.h> #include "xdr3.h" #ifdef CONFIG_NFSD_V4 #include "../internal.h" #include "acl.h" #include "idmap.h" #include "xdr4.h" #endif /* CONFIG_NFSD_V4 */ #include "nfsd.h" #include "vfs.h" #include "filecache.h" #include "trace.h" #define NFSDDBG_FACILITY … /** * nfserrno - Map Linux errnos to NFS errnos * @errno: POSIX(-ish) error code to be mapped * * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If * it's an error we don't expect, log it once and return nfserr_io. */ __be32 nfserrno (int errno) { … } /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged, * or nfs_ok having possibly changed *dpp and *expp */ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp) { … } static void follow_to_parent(struct path *path) { … } static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp) { … } /* * For nfsd purposes, we treat V4ROOT exports as though there was an * export at *every* directory. * We return: * '1' if this dentry *must* be an export point, * '2' if it might be, if there is really a mount here, and * '0' if there is no chance of an export point here. */ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) { … } __be32 nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, unsigned int len, struct svc_export **exp_ret, struct dentry **dentry_ret) { … } /** * nfsd_lookup - look up a single path component for nfsd * * @rqstp: the request context * @fhp: the file handle of the directory * @name: the component name, or %NULL to look up parent * @len: length of name to examine * @resfh: pointer to pre-initialised filehandle to hold result. * * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put * * If the lookup would cross a mountpoint, and the mounted filesystem * is exported to the client with NFSEXP_NOHIDE, then the lookup is * accepted as it stands and the mounted directory is * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 * */ __be32 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, unsigned int len, struct svc_fh *resfh) { … } static void commit_reset_write_verifier(struct nfsd_net *nn, struct svc_rqst *rqstp, int err) { … } /* * Commit metadata changes to stable storage. */ static int commit_inode_metadata(struct inode *inode) { … } static int commit_metadata(struct svc_fh *fhp) { … } /* * Go over the attributes and take care of the small differences between * NFS semantics and what Linux expects. */ static void nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { … } static __be32 nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) { … } static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) { … } /** * nfsd_setattr - Set various file attributes. * @rqstp: controlling RPC transaction * @fhp: filehandle of target * @attr: attributes to set * @guardtime: do not act if ctime.tv_sec does not match this timestamp * * This call may adjust the contents of @attr (in particular, this * call may change the bits in the na_iattr.ia_valid field). * * Returns nfs_ok on success, otherwise an NFS status code is * returned. Caller must release @fhp by calling fh_put in either * case. */ __be32 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_attrs *attr, const struct timespec64 *guardtime) { … } #if defined(CONFIG_NFSD_V4) /* * NFS junction information is stored in an extended attribute. */ #define NFSD_JUNCTION_XATTR_NAME … /** * nfsd4_is_junction - Test if an object could be an NFS junction * * @dentry: object to test * * Returns 1 if "dentry" appears to contain NFS junction information. * Otherwise 0 is returned. */ int nfsd4_is_junction(struct dentry *dentry) { … } static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp) { … } __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, struct nfsd_file *nf_src, u64 src_pos, struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) { … } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { … } __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, loff_t len, int flags) { … } #endif /* defined(CONFIG_NFSD_V4) */ /* * Check server access rights to a file system object */ struct accessmap { … }; static struct accessmap nfs3_regaccess[] = …; static struct accessmap nfs3_diraccess[] = …; static struct accessmap nfs3_anyaccess[] = …; __be32 nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported) { … } int nfsd_open_break_lease(struct inode *inode, int access) { … } /* * Open an existing file or directory. * The may_flags argument indicates the type of open (read/write/lock) * and additional flags. * N.B. After this call fhp needs an fh_put */ static int __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { … } __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { … } /** * nfsd_open_verified - Open a regular file for the filecache * @rqstp: RPC request * @fhp: NFS filehandle of the file to open * @may_flags: internal permission flags * @filp: OUT: open "struct file *" * * Returns zero on success, or a negative errno value. */ int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, struct file **filp) { … } /* * Grab and keep cached pages associated with a file in the svc_rqst * so that they can be passed to the network sendmsg routines * directly. They will be released after the sending has completed. * * Return values: Number of bytes consumed, or -EIO if there are no * remaining pages in rqstp->rq_pages. */ static int nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { … } static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) { … } static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len, size_t expected) { … } static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, u32 *eof, ssize_t host_err) { … } /** * nfsd_splice_read - Perform a VFS read using a splice pipe * @rqstp: RPC transaction context * @fhp: file handle of file to be read * @file: opened struct file of file to be read * @offset: starting byte offset * @count: IN: requested number of bytes; OUT: number of bytes read * @eof: OUT: set non-zero if operation reached the end of the file * * Returns nfs_ok on success, otherwise an nfserr stat value is * returned. */ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, u32 *eof) { … } /** * nfsd_iter_read - Perform a VFS read using an iterator * @rqstp: RPC transaction context * @fhp: file handle of file to be read * @file: opened struct file of file to be read * @offset: starting byte offset * @count: IN: requested number of bytes; OUT: number of bytes read * @base: offset in first page of read buffer * @eof: OUT: set non-zero if operation reached the end of the file * * Some filesystems or situations cannot use nfsd_splice_read. This * function is the slightly less-performant fallback for those cases. * * Returns nfs_ok on success, otherwise an nfserr stat value is * returned. */ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, unsigned int base, u32 *eof) { … } /* * Gathered writes: If another process is currently writing to the file, * there's a high chance this is another nfsd (triggered by a bulk write * from a client's biod). Rather than syncing the file with each write * request, we sleep for 10 msec. * * I don't know if this roughly approximates C. Juszak's idea of * gathered writes, but it's a nice and simple solution (IMHO), and it * seems to work:-) * * Note: we do this only in the NFSv2 case, since v3 and higher have a * better tool (separate unstable writes and commits) for solving this * problem. */ static int wait_for_concurrent_writes(struct file *file) { … } __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int stable, __be32 *verf) { … } /** * nfsd_read_splice_ok - check if spliced reading is supported * @rqstp: RPC transaction context * * Return values: * %true: nfsd_splice_read() may be used * %false: nfsd_splice_read() must not be used * * NFS READ normally uses splice to send data in-place. However the * data in cache can change after the reply's MIC is computed but * before the RPC reply is sent. To prevent the client from * rejecting the server-computed MIC in this somewhat rare case, do * not use splice with the GSS integrity and privacy services. */ bool nfsd_read_splice_ok(struct svc_rqst *rqstp) { … } /** * nfsd_read - Read data from a file * @rqstp: RPC transaction context * @fhp: file handle of file to be read * @offset: starting byte offset * @count: IN: requested number of bytes; OUT: number of bytes read * @eof: OUT: set non-zero if operation reached the end of the file * * The caller must verify that there is enough space in @rqstp.rq_res * to perform this operation. * * N.B. After this call fhp needs an fh_put * * Returns nfs_ok on success, otherwise an nfserr stat value is * returned. */ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long *count, u32 *eof) { … } /* * Write data to a file. * The stable flag requests synchronous writes. * N.B. After this call fhp needs an fh_put */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int stable, __be32 *verf) { … } /** * nfsd_commit - Commit pending writes to stable storage * @rqstp: RPC request being processed * @fhp: NFS filehandle * @nf: target file * @offset: raw offset from beginning of file * @count: raw count of bytes to sync * @verf: filled in with the server's current write verifier * * Note: we guarantee that data that lies within the range specified * by the 'offset' and 'count' parameters will be synced. The server * is permitted to sync data that lies outside this range at the * same time. * * Unfortunately we cannot lock the file to make sure we return full WCC * data to the client, as locking happens lower down in the filesystem. * * Return values: * An nfsstat value in network byte order. */ __be32 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, u64 offset, u32 count, __be32 *verf) { … } /** * nfsd_create_setattr - Set a created file's attributes * @rqstp: RPC transaction being executed * @fhp: NFS filehandle of parent directory * @resfhp: NFS filehandle of new object * @attrs: requested attributes of new object * * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_fh *resfhp, struct nfsd_attrs *attrs) { … } /* HPUX client sometimes creates a file in mode 000, and sets size to 0. * setting size to 0 may fail for some specific file systems by the permission * checking which requires WRITE permission but the mode is 000. * we ignore the resizing(to 0) on the just new created file, since the size is * 0 after file created. * * call this only after vfs_create() is called. * */ static void nfsd_check_ignore_resizing(struct iattr *iap) { … } /* The parent directory should already be locked: */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *resfhp) { … } /* * Create a filesystem object (regular, directory, special). * Note that the parent directory is left locked. * * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp */ __be32 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *resfhp) { … } /* * Read a symlink. On entry, *lenp must contain the maximum path length that * fits into the buffer. On return, it contains the true length. * N.B. After this call fhp needs an fh_put */ __be32 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) { … } /** * nfsd_symlink - Create a symlink and look up its inode * @rqstp: RPC transaction being executed * @fhp: NFS filehandle of parent directory * @fname: filename of the new symlink * @flen: length of @fname * @path: content of the new symlink (NUL-terminated) * @attrs: requested attributes of new object * @resfhp: NFS filehandle of new object * * N.B. After this call _both_ fhp and resfhp need an fh_put * * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, char *path, struct nfsd_attrs *attrs, struct svc_fh *resfhp) { … } /* * Create a hardlink * N.B. After this call _both_ ffhp and tfhp need an fh_put */ __be32 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *name, int len, struct svc_fh *tfhp) { … } static void nfsd_close_cached_files(struct dentry *dentry) { … } static bool nfsd_has_cached_files(struct dentry *dentry) { … } /* * Rename a file * N.B. After this call _both_ ffhp and tfhp need an fh_put */ __be32 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, struct svc_fh *tfhp, char *tname, int tlen) { … } /* * Unlink a file or directory * N.B. After this call fhp needs an fh_put */ __be32 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, char *fname, int flen) { … } /* * We do this buffering because we must not call back into the file * system's ->lookup() method from the filldir callback. That may well * deadlock a number of file systems. * * This is based heavily on the implementation of same in XFS. */ struct buffered_dirent { … }; struct readdir_data { … }; static bool nfsd_buffered_filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { … } static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp, nfsd_filldir_t func, struct readdir_cd *cdp, loff_t *offsetp) { … } /** * nfsd_readdir - Read entries from a directory * @rqstp: RPC transaction context * @fhp: NFS file handle of directory to be read * @offsetp: OUT: seek offset of final entry that was read * @cdp: OUT: an eof error value * @func: entry filler actor * * This implementation ignores the NFSv3/4 verifier cookie. * * NB: normal system calls hold file->f_pos_lock when calling * ->iterate_shared and ->llseek, but nfsd_readdir() does not. * Because the struct file acquired here is not visible to other * threads, it's internal state does not need mutex protection. * * Returns nfs_ok on success, otherwise an nfsstat code is * returned. */ __be32 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, struct readdir_cd *cdp, nfsd_filldir_t func) { … } /** * nfsd_filp_close: close a file synchronously * @fp: the file to close * * nfsd_filp_close() is similar in behaviour to filp_close(). * The difference is that if this is the final close on the * file, the that finalisation happens immediately, rather then * being handed over to a work_queue, as it the case for * filp_close(). * When a user-space process closes a file (even when using * filp_close() the finalisation happens before returning to * userspace, so it is effectively synchronous. When a kernel thread * uses file_close(), on the other hand, the handling is completely * asynchronous. This means that any cost imposed by that finalisation * is not imposed on the nfsd thread, and nfsd could potentually * close files more quickly than the work queue finalises the close, * which would lead to unbounded growth in the queue. * * In some contexts is it not safe to synchronously wait for * close finalisation (see comment for __fput_sync()), but nfsd * does not match those contexts. In partcilarly it does not, at the * time that this function is called, hold and locks and no finalisation * of any file, socket, or device driver would have any cause to wait * for nfsd to make progress. */ void nfsd_filp_close(struct file *fp) { … } /* * Get file system stats * N.B. After this call fhp needs an fh_put */ __be32 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) { … } static int exp_rdonly(struct svc_cred *cred, struct svc_export *exp) { … } #ifdef CONFIG_NFSD_V4 /* * Helper function to translate error numbers. In the case of xattr operations, * some error codes need to be translated outside of the standard translations. * * ENODATA needs to be translated to nfserr_noxattr. * E2BIG to nfserr_xattr2big. * * Additionally, vfs_listxattr can return -ERANGE. This means that the * file has too many extended attributes to retrieve inside an * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation: * filesystems will allow the adding of extended attributes until they hit * their own internal limit. This limit may be larger than XATTR_LIST_MAX. * So, at that point, the attributes are present and valid, but can't * be retrieved using listxattr, since the upper level xattr code enforces * the XATTR_LIST_MAX limit. * * This bug means that we need to deal with listxattr returning -ERANGE. The * best mapping is to return TOOSMALL. */ static __be32 nfsd_xattr_errno(int err) { … } /* * Retrieve the specified user extended attribute. To avoid always * having to allocate the maximum size (since we are not getting * a maximum size from the RPC), do a probe + alloc. Hold a reader * lock on i_rwsem to prevent the extended attribute from changing * size while we're doing this. */ __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, void **bufp, int *lenp) { … } /* * Retrieve the xattr names. Since we can't know how many are * user extended attributes, we must get all attributes here, * and have the XDR encode filter out the "user." ones. * * While this could always just allocate an XATTR_LIST_MAX * buffer, that's a waste, so do a probe + allocate. To * avoid any changes between the probe and allocate, wrap * this in inode_lock. */ __be32 nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp, int *lenp) { … } /** * nfsd_removexattr - Remove an extended attribute * @rqstp: RPC transaction being executed * @fhp: NFS filehandle of object with xattr to remove * @name: name of xattr to remove (NUL-terminate) * * Pass in a NULL pointer for delegated_inode, and let the client deal * with NFS4ERR_DELAY (same as with e.g. setattr and remove). * * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) { … } __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, void *buf, u32 len, u32 flags) { … } #endif /* * Check for a user's access permissions to this inode. */ __be32 nfsd_permission(struct svc_cred *cred, struct svc_export *exp, struct dentry *dentry, int acc) { … }