// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ /* * This file implements VFS file and inode operations for regular files, device * nodes and symlinks as well as address space operations. * * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if * the page is dirty and is used for optimization purposes - dirty pages are * not budgeted so the flag shows that 'ubifs_write_end()' should not release * the budget for this page. The @PG_checked flag is set if full budgeting is * required for the page e.g., when it corresponds to a file hole or it is * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because * it is OK to fail in this function, and the budget is released in * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry * information about how the page was budgeted, to make it possible to release * the budget properly. * * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we * implement. However, this is not true for 'ubifs_writepage()', which may be * called with @i_mutex unlocked. For example, when flusher thread is doing * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. * in the "sys_write -> alloc_pages -> direct reclaim path". So, in * 'ubifs_writepage()' we are only guaranteed that the page is locked. * * Similarly, @i_mutex is not always locked in 'ubifs_read_folio()', e.g., the * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> * ondemand_readahead -> read_folio"). In case of readahead, @I_SYNC flag is not * set as well. However, UBIFS disables readahead. */ #include "ubifs.h" #include <linux/mount.h> #include <linux/slab.h> #include <linux/migrate.h> static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) { … } static int do_readpage(struct folio *folio) { … } /** * release_new_page_budget - release budget of a new page. * @c: UBIFS file-system description object * * This is a helper function which releases budget corresponding to the budget * of one new page of data. */ static void release_new_page_budget(struct ubifs_info *c) { … } /** * release_existing_page_budget - release budget of an existing page. * @c: UBIFS file-system description object * * This is a helper function which releases budget corresponding to the budget * of changing one page of data which already exists on the flash media. */ static void release_existing_page_budget(struct ubifs_info *c) { … } static int write_begin_slow(struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep) { … } /** * allocate_budget - allocate budget for 'ubifs_write_begin()'. * @c: UBIFS file-system description object * @folio: folio to allocate budget for * @ui: UBIFS inode object the page belongs to * @appending: non-zero if the page is appended * * This is a helper function for 'ubifs_write_begin()' which allocates budget * for the operation. The budget is allocated differently depending on whether * this is appending, whether the page is dirty or not, and so on. This * function leaves the @ui->ui_mutex locked in case of appending. * * Returns: %0 in case of success and %-ENOSPC in case of failure. */ static int allocate_budget(struct ubifs_info *c, struct folio *folio, struct ubifs_inode *ui, int appending) { … } /* * This function is called when a page of data is going to be written. Since * the page of data will not necessarily go to the flash straight away, UBIFS * has to reserve space on the media for it, which is done by means of * budgeting. * * This is the hot-path of the file-system and we are trying to optimize it as * much as possible. For this reasons it is split on 2 parts - slow and fast. * * There many budgeting cases: * o a new page is appended - we have to budget for a new page and for * changing the inode; however, if the inode is already dirty, there is * no need to budget for it; * o an existing clean page is changed - we have budget for it; if the page * does not exist on the media (a hole), we have to budget for a new * page; otherwise, we may budget for changing an existing page; the * difference between these cases is that changing an existing page does * not introduce anything new to the FS indexing information, so it does * not grow, and smaller budget is acquired in this case; * o an existing dirty page is changed - no need to budget at all, because * the page budget has been acquired by earlier, when the page has been * marked dirty. * * UBIFS budgeting sub-system may force write-back if it thinks there is no * space to reserve. This imposes some locking restrictions and makes it * impossible to take into account the above cases, and makes it impossible to * optimize budgeting. * * The solution for this is that the fast path of 'ubifs_write_begin()' assumes * there is a plenty of flash space and the budget will be acquired quickly, * without forcing write-back. The slow path does not make this assumption. */ static int ubifs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata) { … } /** * cancel_budget - cancel budget. * @c: UBIFS file-system description object * @folio: folio to cancel budget for * @ui: UBIFS inode object the page belongs to * @appending: non-zero if the page is appended * * This is a helper function for a page write operation. It unlocks the * @ui->ui_mutex in case of appending. */ static void cancel_budget(struct ubifs_info *c, struct folio *folio, struct ubifs_inode *ui, int appending) { … } static int ubifs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { … } /** * populate_page - copy data nodes into a page for bulk-read. * @c: UBIFS file-system description object * @folio: folio * @bu: bulk-read information * @n: next zbranch slot * * Returns: %0 on success and a negative error code on failure. */ static int populate_page(struct ubifs_info *c, struct folio *folio, struct bu_info *bu, int *n) { … } /** * ubifs_do_bulk_read - do bulk-read. * @c: UBIFS file-system description object * @bu: bulk-read information * @folio1: first folio to read * * Returns: %1 if the bulk-read is done, otherwise %0 is returned. */ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, struct folio *folio1) { … } /** * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. * @folio: folio from which to start bulk-read. * * Some flash media are capable of reading sequentially at faster rates. UBIFS * bulk-read facility is designed to take advantage of that, by reading in one * go consecutive data nodes that are also located consecutively in the same * LEB. * * Returns: %1 if a bulk-read is done and %0 otherwise. */ static int ubifs_bulk_read(struct folio *folio) { … } static int ubifs_read_folio(struct file *file, struct folio *folio) { … } static int do_writepage(struct folio *folio, size_t len) { … } /* * When writing-back dirty inodes, VFS first writes-back pages belonging to the * inode, then the inode itself. For UBIFS this may cause a problem. Consider a * situation when a we have an inode with size 0, then a megabyte of data is * appended to the inode, then write-back starts and flushes some amount of the * dirty pages, the journal becomes full, commit happens and finishes, and then * an unclean reboot happens. When the file system is mounted next time, the * inode size would still be 0, but there would be many pages which are beyond * the inode size, they would be indexed and consume flash space. Because the * journal has been committed, the replay would not be able to detect this * situation and correct the inode size. This means UBIFS would have to scan * whole index and correct all inode sizes, which is long an unacceptable. * * To prevent situations like this, UBIFS writes pages back only if they are * within the last synchronized inode size, i.e. the size which has been * written to the flash media last time. Otherwise, UBIFS forces inode * write-back, thus making sure the on-flash inode contains current inode size, * and then keeps writing pages back. * * Some locking issues explanation. 'ubifs_writepage()' first is called with * the page locked, and it locks @ui_mutex. However, write-back does take inode * @i_mutex, which means other VFS operations may be run on this inode at the * same time. And the problematic one is truncation to smaller size, from where * we have to call 'truncate_setsize()', which first changes @inode->i_size, * then drops the truncated pages. And while dropping the pages, it takes the * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. * This means that @inode->i_size is changed while @ui_mutex is unlocked. * * XXX(truncate): with the new truncate sequence this is not true anymore, * and the calls to truncate_setsize can be move around freely. They should * be moved to the very end of the truncate sequence. * * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond * inode size. How do we do this if @inode->i_size may became smaller while we * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size * internally and updates it under @ui_mutex. * * Q: why we do not worry that if we race with truncation, we may end up with a * situation when the inode is truncated while we are in the middle of * 'do_writepage()', so we do write beyond inode size? * A: If we are in the middle of 'do_writepage()', truncation would be locked * on the page lock and it would not write the truncated inode node to the * journal before we have finished. */ static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc, void *data) { … } static int ubifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { … } /** * do_attr_changes - change inode attributes. * @inode: inode to change attributes for * @attr: describes attributes to change */ static void do_attr_changes(struct inode *inode, const struct iattr *attr) { … } /** * do_truncation - truncate an inode. * @c: UBIFS file-system description object * @inode: inode to truncate * @attr: inode attribute changes description * * This function implements VFS '->setattr()' call when the inode is truncated * to a smaller size. * * Returns: %0 in case of success and a negative error code * in case of failure. */ static int do_truncation(struct ubifs_info *c, struct inode *inode, const struct iattr *attr) { … } /** * do_setattr - change inode attributes. * @c: UBIFS file-system description object * @inode: inode to change attributes for * @attr: inode attribute changes description * * This function implements VFS '->setattr()' call for all cases except * truncations to smaller size. * * Returns: %0 in case of success and a negative * error code in case of failure. */ static int do_setattr(struct ubifs_info *c, struct inode *inode, const struct iattr *attr) { … } int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { … } static void ubifs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { … } int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { … } /** * mctime_update_needed - check if mtime or ctime update is needed. * @inode: the inode to do the check for * @now: current time * * This helper function checks if the inode mtime/ctime should be updated or * not. If current values of the time-stamps are within the UBIFS inode time * granularity, they are not updated. This is an optimization. * * Returns: %1 if time update is needed, %0 if not */ static inline int mctime_update_needed(const struct inode *inode, const struct timespec64 *now) { … } /** * ubifs_update_time - update time of inode. * @inode: inode to update * @flags: time updating control flag determines updating * which time fields of @inode * * This function updates time of the inode. * * Returns: %0 for success or a negative error code otherwise. */ int ubifs_update_time(struct inode *inode, int flags) { … } /** * update_mctime - update mtime and ctime of an inode. * @inode: inode to update * * This function updates mtime and ctime of the inode if it is not equivalent to * current time. * * Returns: %0 in case of success and a negative error code in * case of failure. */ static int update_mctime(struct inode *inode) { … } static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from) { … } static bool ubifs_dirty_folio(struct address_space *mapping, struct folio *folio) { … } static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags) { … } /* * mmap()d file has taken write protection fault and is being made writable. * UBIFS must ensure page is budgeted for. */ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) { … } static const struct vm_operations_struct ubifs_file_vm_ops = …; static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) { … } static const char *ubifs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { … } static int ubifs_symlink_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { … } const struct address_space_operations ubifs_file_address_operations = …; const struct inode_operations ubifs_file_inode_operations = …; const struct inode_operations ubifs_symlink_inode_operations = …; const struct file_operations ubifs_file_operations = …;