// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/direct.c * * Copyright (C) 2003 by Chuck Lever <[email protected]> * * High-performance uncached I/O for the Linux NFS client * * There are important applications whose performance or correctness * depends on uncached access to file data. Database clusters * (multiple copies of the same instance running on separate hosts) * implement their own cache coherency protocol that subsumes file * system cache protocols. Applications that process datasets * considerably larger than the client's memory do not always benefit * from a local cache. A streaming video server, for instance, has no * need to cache the contents of a file. * * When an application requests uncached I/O, all read and write requests * are made directly to the server; data stored or fetched via these * requests is not cached in the Linux page cache. The client does not * correct unaligned requests from applications. All requested bytes are * held on permanent storage before a direct write system call returns to * an application. * * Solaris implements an uncached I/O facility called directio() that * is used for backups and sequential I/O to very large files. Solaris * also supports uncaching whole NFS partitions with "-o forcedirectio," * an undocumented mount option. * * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with * help from Andrew Morton. * * 18 Dec 2001 Initial implementation for 2.4 --cel * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy * 08 Jun 2003 Port to 2.5 APIs --cel * 31 Mar 2004 Handle direct I/O without VFS support --cel * 15 Sep 2004 Parallel async reads --cel * 04 May 2005 support O_DIRECT with aio --cel * */ #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/kref.h> #include <linux/slab.h> #include <linux/task_io_accounting_ops.h> #include <linux/module.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> #include <linux/sunrpc/clnt.h> #include <linux/uaccess.h> #include <linux/atomic.h> #include "internal.h" #include "iostat.h" #include "pnfs.h" #include "fscache.h" #include "nfstrace.h" #define NFSDBG_FACILITY … static struct kmem_cache *nfs_direct_cachep; static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; static void nfs_direct_write_complete(struct nfs_direct_req *dreq); static void nfs_direct_write_schedule_work(struct work_struct *work); static inline void get_dreq(struct nfs_direct_req *dreq) { … } static inline int put_dreq(struct nfs_direct_req *dreq) { … } static void nfs_direct_handle_truncated(struct nfs_direct_req *dreq, const struct nfs_pgio_header *hdr, ssize_t dreq_len) { … } static void nfs_direct_count_bytes(struct nfs_direct_req *dreq, const struct nfs_pgio_header *hdr) { … } static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, struct nfs_page *req) { … } /** * nfs_swap_rw - NFS address space operation for swap I/O * @iocb: target I/O control block * @iter: I/O buffer * * Perform IO to the swap-file. This is much like direct IO. */ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) { … } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) { … } void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq) { … } static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { … } static void nfs_direct_req_free(struct kref *kref) { … } static void nfs_direct_req_release(struct nfs_direct_req *dreq) { … } ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset) { … } EXPORT_SYMBOL_GPL(…); /* * Collects and returns the final error value/byte-count. */ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) { … } /* * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust * the iocb is still valid here if this is a synchronous request. */ static void nfs_direct_complete(struct nfs_direct_req *dreq) { … } static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) { … } static void nfs_read_sync_pgio_error(struct list_head *head, int error) { … } static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) { … } static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = …; /* * For each rsize'd chunk of the user's buffer, dispatch an NFS READ * operation. If nfs_readdata_alloc() or get_user_pages() fails, * bail and stop sending more reads. Read length accounting is * handled automatically by nfs_direct_read_result(). Otherwise, if * no requests have been sent, just return an error. */ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, loff_t pos) { … } /** * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers into which to read data * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if * the request starts before the end of the file. For that check * to work, we must generate a GETATTR before each direct read, and * even then there is a window between the GETATTR and the subsequent * READ where the file size could change. Our preference is simply * to do all reads the application wants, and the server will take * care of managing the end of file boundary. * * This function also eliminates unnecessarily updating the file's * atime locally, as the NFS server sets the file's atime, and this * client must read the updated atime from the server back into its * cache. */ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, bool swap) { … } static void nfs_direct_add_page_head(struct list_head *list, struct nfs_page *req) { … } static void nfs_direct_join_group(struct list_head *list, struct nfs_commit_info *cinfo, struct inode *inode) { … } static void nfs_direct_write_scan_commit_list(struct inode *inode, struct list_head *list, struct nfs_commit_info *cinfo) { … } static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { … } static void nfs_direct_commit_complete(struct nfs_commit_data *data) { … } static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, struct nfs_page *req) { … } static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = …; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { … } static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) { … } static void nfs_direct_write_schedule_work(struct work_struct *work) { … } static void nfs_direct_write_complete(struct nfs_direct_req *dreq) { … } static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { … } static void nfs_write_sync_pgio_error(struct list_head *head, int error) { … } static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) { … } static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = …; /* * NB: Return the value of the first error return code. Subsequent * errors after the first one are ignored. */ /* * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE * operation. If nfs_writedata_alloc() or get_user_pages() fails, * bail and stop sending more writes. Write length accounting is * handled automatically by nfs_direct_write_result(). Otherwise, if * no requests have been sent, just return an error. */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, loff_t pos, int ioflags) { … } /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode * semaphore and updating the i_size. The NFS server will set * the new i_size and this client must read the updated size * back into its cache. We let the server do generic write * parameter checking and report problems. * * We eliminate local atime updates, see direct read above. * * We avoid unnecessary page cache invalidations for normal cached * readers of this file. * * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, bool swap) { … } /** * nfs_init_directcache - create a slab cache for nfs_direct_req structures * */ int __init nfs_init_directcache(void) { … } /** * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures * */ void nfs_destroy_directcache(void) { … }