// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/fs.h> #include <linux/sort.h> #include <linux/slab.h> #include <linux/iversion.h> #include "super.h" #include "mds_client.h" #include <linux/ceph/decode.h> /* unused map expires after 5 minutes */ #define CEPH_SNAPID_MAP_TIMEOUT … /* * Snapshots in ceph are driven in large part by cooperation from the * client. In contrast to local file systems or file servers that * implement snapshots at a single point in the system, ceph's * distributed access to storage requires clients to help decide * whether a write logically occurs before or after a recently created * snapshot. * * This provides a perfect instantanous client-wide snapshot. Between * clients, however, snapshots may appear to be applied at slightly * different points in time, depending on delays in delivering the * snapshot notification. * * Snapshots are _not_ file system-wide. Instead, each snapshot * applies to the subdirectory nested beneath some directory. This * effectively divides the hierarchy into multiple "realms," where all * of the files contained by each realm share the same set of * snapshots. An individual realm's snap set contains snapshots * explicitly created on that realm, as well as any snaps in its * parent's snap set _after_ the point at which the parent became it's * parent (due to, say, a rename). Similarly, snaps from prior parents * during the time intervals during which they were the parent are included. * * The client is spared most of this detail, fortunately... it must only * maintains a hierarchy of realms reflecting the current parent/child * realm relationship, and for each realm has an explicit list of snaps * inherited from prior parents. * * A snap_realm struct is maintained for realms containing every inode * with an open cap in the system. (The needed snap realm information is * provided by the MDS whenever a cap is issued, i.e., on open.) A 'seq' * version number is used to ensure that as realm parameters change (new * snapshot, new parent, etc.) the client's realm hierarchy is updated. * * The realm hierarchy drives the generation of a 'snap context' for each * realm, which simply lists the resulting set of snaps for the realm. This * is attached to any writes sent to OSDs. */ /* * Unfortunately error handling is a bit mixed here. If we get a snap * update, but don't have enough memory to update our realm hierarchy, * it's not clear what we can do about it (besides complaining to the * console). */ /* * increase ref count for the realm * * caller must hold snap_rwsem. */ void ceph_get_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { … } static void __insert_snap_realm(struct rb_root *root, struct ceph_snap_realm *new) { … } /* * create and get the realm rooted at @ino and bump its ref count. * * caller must hold snap_rwsem for write. */ static struct ceph_snap_realm *ceph_create_snap_realm( struct ceph_mds_client *mdsc, u64 ino) { … } /* * lookup the realm rooted at @ino. * * caller must hold snap_rwsem. */ static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino) { … } struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino) { … } static void __put_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm); /* * called with snap_rwsem (write) */ static void __destroy_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { … } /* * caller holds snap_rwsem (write) */ static void __put_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { … } /* * See comments in ceph_get_snap_realm. Caller needn't hold any locks. */ void ceph_put_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { … } /* * Clean up any realms whose ref counts have dropped to zero. Note * that this does not include realms who were created but not yet * used. * * Called under snap_rwsem (write) */ static void __cleanup_empty_realms(struct ceph_mds_client *mdsc) { … } void ceph_cleanup_global_and_empty_realms(struct ceph_mds_client *mdsc) { … } /* * adjust the parent realm of a given @realm. adjust child list, and parent * pointers, and ref counts appropriately. * * return true if parent was changed, 0 if unchanged, <0 on error. * * caller must hold snap_rwsem for write. */ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, u64 parentino) { … } static int cmpu64_rev(const void *a, const void *b) { … } /* * build the snap context for a given realm. */ static int build_snap_context(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, struct list_head *realm_queue, struct list_head *dirty_realms) { … } /* * rebuild snap context for the given realm and all of its children. */ static void rebuild_snap_realms(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, struct list_head *dirty_realms) { … } /* * helper to allocate and decode an array of snapids. free prior * instance, if any. */ static int dup_array(u64 **dst, __le64 *src, u32 num) { … } static bool has_new_snaps(struct ceph_snap_context *o, struct ceph_snap_context *n) { … } /* * When a snapshot is applied, the size/mtime inode metadata is queued * in a ceph_cap_snap (one for each snapshot) until writeback * completes and the metadata can be flushed back to the MDS. * * However, if a (sync) write is currently in-progress when we apply * the snapshot, we have to wait until the write succeeds or fails * (and a final size/mtime is known). In this case the * cap_snap->writing = 1, and is said to be "pending." When the write * finishes, we __ceph_finish_cap_snap(). * * Caller must hold snap_rwsem for read (i.e., the realm topology won't * change). */ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap **pcapsnap) { … } /* * Finalize the size, mtime for a cap_snap.. that is, settle on final values * to be used for the snapshot, to be flushed back to the mds. * * If capsnap can now be flushed, add to snap_flush list, and return 1. * * Caller must hold i_ceph_lock. */ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { … } /* * Queue cap_snaps for snap writeback for this realm and its children. * Called under snap_rwsem, so realm topology won't change. */ static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { … } /* * Parse and apply a snapblob "snap trace" from the MDS. This specifies * the snap realm parameters from a given realm and all of its ancestors, * up to the root. * * Caller must hold snap_rwsem for write. */ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, void *p, void *e, bool deletion, struct ceph_snap_realm **realm_ret) { … } /* * Send any cap_snaps that are queued for flush. Try to carry * s_mutex across multiple snap flushes to avoid locking overhead. * * Caller holds no locks. */ static void flush_snaps(struct ceph_mds_client *mdsc) { … } /** * ceph_change_snap_realm - change the snap_realm for an inode * @inode: inode to move to new snap realm * @realm: new realm to move inode into (may be NULL) * * Detach an inode from its old snaprealm (if any) and attach it to * the new snaprealm (if any). The old snap realm reference held by * the inode is put. If realm is non-NULL, then the caller's reference * to it is taken over by the inode. */ void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm) { … } /* * Handle a snap notification from the MDS. * * This can take two basic forms: the simplest is just a snap creation * or deletion notification on an existing realm. This should update the * realm and its children. * * The more difficult case is realm creation, due to snap creation at a * new point in the file hierarchy, or due to a rename that moves a file or * directory into another realm. */ void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { … } struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, u64 snap) { … } void ceph_put_snapid_map(struct ceph_mds_client* mdsc, struct ceph_snapid_map *sm) { … } void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) { … } void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc) { … }