validation.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-or-later
/* vnode and volume validity verification.
 *
 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells ([email protected])
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include "internal.h"

/*
 * Data validation is managed through a number of mechanisms from the server:
 *
 *  (1) On first contact with a server (such as if it has just been rebooted),
 *      the server sends us a CB.InitCallBackState* request.
 *
 *  (2) On a RW volume, in response to certain vnode (inode)-accessing RPC
 *      calls, the server maintains a time-limited per-vnode promise that it
 *      will send us a CB.CallBack request if a third party alters the vnodes
 *      accessed.
 *
 *      Note that a vnode-level callbacks may also be sent for other reasons,
 *      such as filelock release.
 *
 *  (3) On a RO (or Backup) volume, in response to certain vnode-accessing RPC
 *      calls, each server maintains a time-limited per-volume promise that it
 *      will send us a CB.CallBack request if the RO volume is updated to a
 *      snapshot of the RW volume ("vos release").  This is an atomic event
 *      that cuts over all instances of the RO volume across multiple servers
 *      simultaneously.
 *
 *	Note that a volume-level callbacks may also be sent for other reasons,
 *	such as the volumeserver taking over control of the volume from the
 *	fileserver.
 *
 *	Note also that each server maintains an independent time limit on an
 *	independent callback.
 *
 *  (4) Certain RPC calls include a volume information record "VolSync" in
 *      their reply.  This contains a creation date for the volume that should
 *      remain unchanged for a RW volume (but will be changed if the volume is
 *      restored from backup) or will be bumped to the time of snapshotting
 *      when a RO volume is released.
 *
 * In order to track this events, the following are provided:
 *
 *	->cb_v_break.  A counter of events that might mean that the contents of
 *	a volume have been altered since we last checked a vnode.
 *
 *	->cb_v_check.  A counter of the number of events that we've sent a
 *	query to the server for.  Everything's up to date if this equals
 *	cb_v_break.
 *
 *	->cb_scrub.  A counter of the number of regression events for which we
 *	have to completely wipe the cache.
 *
 *	->cb_ro_snapshot.  A counter of the number of times that we've
 *      recognised that a RO volume has been updated.
 *
 *	->cb_break.  A counter of events that might mean that the contents of a
 *      vnode have been altered.
 *
 *	->cb_expires_at.  The time at which the callback promise expires or
 *      AFS_NO_CB_PROMISE if we have no promise.
 *
 * The way we manage things is:
 *
 *  (1) When a volume-level CB.CallBack occurs, we increment ->cb_v_break on
 *      the volume and reset ->cb_expires_at (ie. set AFS_NO_CB_PROMISE) on the
 *      volume and volume's server record.
 *
 *  (2) When a CB.InitCallBackState occurs, we treat this as a volume-level
 *	callback break on all the volumes that have been using that volume
 *	(ie. increment ->cb_v_break and reset ->cb_expires_at).
 *
 *  (3) When a vnode-level CB.CallBack occurs, we increment ->cb_break on the
 *	vnode and reset its ->cb_expires_at.  If the vnode is mmapped, we also
 *	dispatch a work item to unmap all PTEs to the vnode's pagecache to
 *	force reentry to the filesystem for revalidation.
 *
 *  (4) When entering the filesystem, we call afs_validate() to check the
 *	validity of a vnode.  This first checks to see if ->cb_v_check and
 *	->cb_v_break match, and if they don't, we lock volume->cb_check_lock
 *	exclusively and perform an FS.FetchStatus on the vnode.
 *
 *	After checking the volume, we check the vnode.  If there's a mismatch
 *	between the volume counters and the vnode's mirrors of those counters,
 *	we lock vnode->validate_lock and issue an FS.FetchStatus on the vnode.
 *
 *  (5) When the reply from FS.FetchStatus arrives, the VolSync record is
 *      parsed:
 *
 *	(A) If the Creation timestamp has changed on a RW volume or regressed
 *	    on a RO volume, we try to increment ->cb_scrub; if it advances on a
 *	    RO volume, we assume "vos release" happened and try to increment
 *	    ->cb_ro_snapshot.
 *
 *      (B) If the Update timestamp has regressed, we try to increment
 *	    ->cb_scrub.
 *
 *      Note that in both of these cases, we only do the increment if we can
 *      cmpxchg the value of the timestamp from the value we noted before the
 *      op.  This tries to prevent parallel ops from fighting one another.
 *
 *	volume->cb_v_check is then set to ->cb_v_break.
 *
 *  (6) The AFSCallBack record included in the FS.FetchStatus reply is also
 *	parsed and used to set the promise in ->cb_expires_at for the vnode,
 *	the volume and the volume's server record.
 *
 *  (7) If ->cb_scrub is seen to have advanced, we invalidate the pagecache for
 *      the vnode.
 */

/*
 * Check the validity of a vnode/inode and its parent volume.
 */
bool afs_check_validity(const struct afs_vnode *vnode)
{ … }

/*
 * See if the server we've just talked to is currently excluded.
 */
static bool __afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume)
{ … }

/*
 * Update the volume's server list when the creation time changes and see if
 * the server we've just talked to is currently excluded.
 */
static int afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume)
{ … }

/*
 * Handle a change to the volume creation time in the VolSync record.
 */
static int afs_update_volume_creation_time(struct afs_operation *op, struct afs_volume *volume)
{ … }

/*
 * Handle a change to the volume update time in the VolSync record.
 */
static void afs_update_volume_update_time(struct afs_operation *op, struct afs_volume *volume)
{ … }

static int afs_update_volume_times(struct afs_operation *op, struct afs_volume *volume)
{ … }

/*
 * Update the state of a volume, including recording the expiration time of the
 * callback promise.  Returns 1 to redo the operation from the start.
 */
int afs_update_volume_state(struct afs_operation *op)
{ … }

/*
 * mark the data attached to an inode as obsolete due to a write on the server
 * - might also want to ditch all the outstanding writes and dirty pages
 */
static void afs_zap_data(struct afs_vnode *vnode)
{ … }

/*
 * validate a vnode/inode
 * - there are several things we need to check
 *   - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
 *     symlink)
 *   - parent dir metadata changed (security changes)
 *   - dentry data changed (write, truncate)
 *   - dentry metadata changed (security changes)
 */
int afs_validate(struct afs_vnode *vnode, struct key *key)
{ … }
linux/fs/afs/validation.c