linux/fs/ceph/caps.c

// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>

#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/writeback.h>
#include <linux/iversion.h>
#include <linux/filelock.h>
#include <linux/jiffies.h>

#include "super.h"
#include "mds_client.h"
#include "cache.h"
#include "crypto.h"
#include <linux/ceph/decode.h>
#include <linux/ceph/messenger.h>

/*
 * Capability management
 *
 * The Ceph metadata servers control client access to inode metadata
 * and file data by issuing capabilities, granting clients permission
 * to read and/or write both inode field and file data to OSDs
 * (storage nodes).  Each capability consists of a set of bits
 * indicating which operations are allowed.
 *
 * If the client holds a *_SHARED cap, the client has a coherent value
 * that can be safely read from the cached inode.
 *
 * In the case of a *_EXCL (exclusive) or FILE_WR capabilities, the
 * client is allowed to change inode attributes (e.g., file size,
 * mtime), note its dirty state in the ceph_cap, and asynchronously
 * flush that metadata change to the MDS.
 *
 * In the event of a conflicting operation (perhaps by another
 * client), the MDS will revoke the conflicting client capabilities.
 *
 * In order for a client to cache an inode, it must hold a capability
 * with at least one MDS server.  When inodes are released, release
 * notifications are batched and periodically sent en masse to the MDS
 * cluster to release server state.
 */

static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc);
static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
				 struct ceph_mds_session *session,
				 struct ceph_inode_info *ci,
				 u64 oldest_flush_tid);

/*
 * Generate readable cap strings for debugging output.
 */
#define MAX_CAP_STR
static char cap_str[MAX_CAP_STR][40];
static DEFINE_SPINLOCK(cap_str_lock);
static int last_cap_str;

static char *gcap_string(char *s, int c)
{}

const char *ceph_cap_string(int caps)
{}

void ceph_caps_init(struct ceph_mds_client *mdsc)
{}

void ceph_caps_finalize(struct ceph_mds_client *mdsc)
{}

void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
			      struct ceph_mount_options *fsopt)
{}

static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
{}

/*
 * Called under mdsc->mutex.
 */
int ceph_reserve_caps(struct ceph_mds_client *mdsc,
		      struct ceph_cap_reservation *ctx, int need)
{}

void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
			 struct ceph_cap_reservation *ctx)
{}

struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
			      struct ceph_cap_reservation *ctx)
{}

void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
{}

void ceph_reservation_status(struct ceph_fs_client *fsc,
			     int *total, int *avail, int *used, int *reserved,
			     int *min)
{}

/*
 * Find ceph_cap for given mds, if any.
 *
 * Called with i_ceph_lock held.
 */
struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{}

struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{}

/*
 * Called under i_ceph_lock.
 */
static void __insert_cap_node(struct ceph_inode_info *ci,
			      struct ceph_cap *new)
{}

/*
 * (re)set cap hold timeouts, which control the delayed release
 * of unused caps back to the MDS.  Should be called on cap use.
 */
static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
			       struct ceph_inode_info *ci)
{}

/*
 * (Re)queue cap at the end of the delayed cap release list.
 *
 * If I_FLUSH is set, leave the inode at the front of the list.
 *
 * Caller holds i_ceph_lock
 *    -> we take mdsc->cap_delay_lock
 */
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
				struct ceph_inode_info *ci)
{}

/*
 * Queue an inode for immediate writeback.  Mark inode with I_FLUSH,
 * indicating we should send a cap message to flush dirty metadata
 * asap, and move to the front of the delayed cap list.
 */
static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
				      struct ceph_inode_info *ci)
{}

/*
 * Cancel delayed work on cap.
 *
 * Caller must hold i_ceph_lock.
 */
static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
			       struct ceph_inode_info *ci)
{}

/* Common issue checks for add_cap, handle_cap_grant. */
static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
			      unsigned issued)
{}

/**
 * change_auth_cap_ses - move inode to appropriate lists when auth caps change
 * @ci: inode to be moved
 * @session: new auth caps session
 */
void change_auth_cap_ses(struct ceph_inode_info *ci,
			 struct ceph_mds_session *session)
{}

/*
 * Add a capability under the given MDS session.
 *
 * Caller should hold session snap_rwsem (read) and ci->i_ceph_lock
 *
 * @fmode is the open file mode, if we are opening a file, otherwise
 * it is < 0.  (This is so we can atomically add the cap and add an
 * open file reference to it.)
 */
void ceph_add_cap(struct inode *inode,
		  struct ceph_mds_session *session, u64 cap_id,
		  unsigned issued, unsigned wanted,
		  unsigned seq, unsigned mseq, u64 realmino, int flags,
		  struct ceph_cap **new_cap)
{}

/*
 * Return true if cap has not timed out and belongs to the current
 * generation of the MDS session (i.e. has not gone 'stale' due to
 * us losing touch with the mds).
 */
static int __cap_is_valid(struct ceph_cap *cap)
{}

/*
 * Return set of valid cap bits issued to us.  Note that caps time
 * out, and may be invalidated in bulk if the client session times out
 * and session->s_cap_gen is bumped.
 */
int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
{}

/*
 * Get cap bits issued by caps other than @ocap
 */
int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
{}

/*
 * Move a cap to the end of the LRU (oldest caps at list head, newest
 * at list tail).
 */
static void __touch_cap(struct ceph_cap *cap)
{}

/*
 * Check if we hold the given mask.  If so, move the cap(s) to the
 * front of their respective LRUs.  (This is the preferred way for
 * callers to check for caps they want.)
 */
int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
{}

int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask,
				   int touch)
{}

/*
 * Return true if mask caps are currently being revoked by an MDS.
 */
int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
			       struct ceph_cap *ocap, int mask)
{}

int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
{}

int __ceph_caps_used(struct ceph_inode_info *ci)
{}

#define FMODE_WAIT_BIAS

/*
 * wanted, by virtue of open file modes
 */
int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
{}

/*
 * wanted, by virtue of open file modes AND cap refs (buffered/cached data)
 */
int __ceph_caps_wanted(struct ceph_inode_info *ci)
{}

/*
 * Return caps we have registered with the MDS(s) as 'wanted'.
 */
int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
{}

int ceph_is_any_caps(struct inode *inode)
{}

/*
 * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
 *
 * caller should hold i_ceph_lock.
 * caller will not hold session s_mutex if called from destroy_inode.
 */
void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{}

void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
		     bool queue_release)
{}

struct cap_msg_args {};

/* Marshal up the cap msg to the MDS */
static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
{}

/*
 * Queue cap releases when an inode is dropped from our cache.
 */
void __ceph_remove_caps(struct ceph_inode_info *ci)
{}

/*
 * Prepare to send a cap message to an MDS. Update the cap state, and populate
 * the arg struct with the parameters that will need to be sent. This should
 * be done under the i_ceph_lock to guard against changes to cap state.
 *
 * Make note of max_size reported/requested from mds, revoked caps
 * that have now been implemented.
 */
static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
		       int op, int flags, int used, int want, int retain,
		       int flushing, u64 flush_tid, u64 oldest_flush_tid)
{}

#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
#define CAP_MSG_FIXED_FIELDS

static inline int cap_msg_size(struct cap_msg_args *arg)
{}
#else
#define CAP_MSG_FIXED_FIELDS

static inline int cap_msg_size(struct cap_msg_args *arg)
{
	return CAP_MSG_FIXED_FIELDS;
}
#endif /* CONFIG_FS_ENCRYPTION */

/*
 * Send a cap msg on the given inode.
 *
 * Caller should hold snap_rwsem (read), s_mutex.
 */
static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
{}

static inline int __send_flush_snap(struct inode *inode,
				    struct ceph_mds_session *session,
				    struct ceph_cap_snap *capsnap,
				    u32 mseq, u64 oldest_flush_tid)
{}

/*
 * When a snapshot is taken, clients accumulate dirty metadata on
 * inodes with capabilities in ceph_cap_snaps to describe the file
 * state at the time the snapshot was taken.  This must be flushed
 * asynchronously back to the MDS once sync writes complete and dirty
 * data is written out.
 *
 * Called under i_ceph_lock.
 */
static void __ceph_flush_snaps(struct ceph_inode_info *ci,
			       struct ceph_mds_session *session)
		__releases(ci->i_ceph_lock)
		__acquires(ci->i_ceph_lock)
{}

void ceph_flush_snaps(struct ceph_inode_info *ci,
		      struct ceph_mds_session **psession)
{}

/*
 * Mark caps dirty.  If inode is newly dirty, return the dirty flags.
 * Caller is then responsible for calling __mark_inode_dirty with the
 * returned flags value.
 */
int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
			   struct ceph_cap_flush **pcf)
{}

struct ceph_cap_flush *ceph_alloc_cap_flush(void)
{}

void ceph_free_cap_flush(struct ceph_cap_flush *cf)
{}

static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc)
{}

/*
 * Remove cap_flush from the mdsc's or inode's flushing cap list.
 * Return true if caller needs to wake up flush waiters.
 */
static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
					 struct ceph_cap_flush *cf)
{}

static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
				       struct ceph_cap_flush *cf)
{}

/*
 * Add dirty inode to the flushing list.  Assigned a seq number so we
 * can wait for caps to flush without starving.
 *
 * Called under i_ceph_lock. Returns the flush tid.
 */
static u64 __mark_caps_flushing(struct inode *inode,
				struct ceph_mds_session *session, bool wake,
				u64 *oldest_flush_tid)
{}

/*
 * try to invalidate mapping pages without blocking.
 */
static int try_nonblocking_invalidate(struct inode *inode)
	__releases(ci->i_ceph_lock)
	__acquires(ci->i_ceph_lock)
{}

bool __ceph_should_report_size(struct ceph_inode_info *ci)
{}

/*
 * Swiss army knife function to examine currently used and wanted
 * versus held caps.  Release, flush, ack revoked caps to mds as
 * appropriate.
 *
 *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
 *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
 *    further delay.
 *  CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
 *    further delay.
 */
void ceph_check_caps(struct ceph_inode_info *ci, int flags)
{}

/*
 * Try to flush dirty caps back to the auth mds.
 */
static int try_flush_caps(struct inode *inode, u64 *ptid)
{}

/*
 * Return true if we've flushed caps through the given flush_tid.
 */
static int caps_are_flushed(struct inode *inode, u64 flush_tid)
{}

/*
 * flush the mdlog and wait for any unsafe requests to complete.
 */
static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
{}

int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{}

/*
 * Flush any dirty caps back to the mds.  If we aren't asked to wait,
 * queue inode for flush but don't do so immediately, because we can
 * get by with fewer MDS messages if we wait for data writeback to
 * complete first.
 */
int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
{}

static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
				 struct ceph_mds_session *session,
				 struct ceph_inode_info *ci,
				 u64 oldest_flush_tid)
	__releases(ci->i_ceph_lock)
	__acquires(ci->i_ceph_lock)
{}

void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
				   struct ceph_mds_session *session)
{}

void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
			     struct ceph_mds_session *session)
{}

void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
				   struct ceph_inode_info *ci)
{}


/*
 * Take references to capabilities we hold, so that we don't release
 * them to the MDS prematurely.
 */
void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
			    bool snap_rwsem_locked)
{}

/*
 * Try to grab cap references.  Specify those refs we @want, and the
 * minimal set we @need.  Also include the larger offset we are writing
 * to (when applicable), and check against max_size here as well.
 * Note that caller is responsible for ensuring max_size increases are
 * requested from the MDS.
 *
 * Returns 0 if caps were not able to be acquired (yet), 1 if succeed,
 * or a negative error code. There are 3 speical error codes:
 *  -EAGAIN:  need to sleep but non-blocking is specified
 *  -EFBIG:   ask caller to call check_max_size() and try again.
 *  -EUCLEAN: ask caller to call ceph_renew_caps() and try again.
 */
enum {};

static int try_get_cap_refs(struct inode *inode, int need, int want,
			    loff_t endoff, int flags, int *got)
{}

/*
 * Check the offset we are writing up to against our current
 * max_size.  If necessary, tell the MDS we want to write to
 * a larger offset.
 */
static void check_max_size(struct inode *inode, loff_t endoff)
{}

static inline int get_used_fmode(int caps)
{}

int ceph_try_get_caps(struct inode *inode, int need, int want,
		      bool nonblock, int *got)
{}

/*
 * Wait for caps, and take cap references.  If we can't get a WR cap
 * due to a small max_size, make sure we check_max_size (and possibly
 * ask the mds) so we don't get hung up indefinitely.
 */
int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
		    int want, loff_t endoff, int *got)
{}

int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff,
		  int *got)
{}

/*
 * Take cap refs.  Caller must already know we hold at least one ref
 * on the caps in question or we don't know this is safe.
 */
void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
{}


/*
 * drop cap_snap that is not associated with any snapshot.
 * we don't need to send FLUSHSNAP message for it.
 */
static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
				  struct ceph_cap_snap *capsnap)
{}

enum put_cap_refs_mode {};

/*
 * Release cap refs.
 *
 * If we released the last ref on any given cap, call ceph_check_caps
 * to release (or schedule a release).
 *
 * If we are releasing a WR cap (from a sync write), finalize any affected
 * cap_snap, and wake up any waiters.
 */
static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
				enum put_cap_refs_mode mode)
{}

void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
{}

void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had)
{}

/*
 * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
 * context.  Adjust per-snap dirty page accounting as appropriate.
 * Once all dirty data for a cap_snap is flushed, flush snapped file
 * metadata back to the MDS.  If we dropped the last ref, call
 * ceph_check_caps.
 */
void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
				struct ceph_snap_context *snapc)
{}

/*
 * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
 */
static void invalidate_aliases(struct inode *inode)
{}

struct cap_extra_info {};

/*
 * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
 * actually be a revocation if it specifies a smaller cap set.)
 *
 * caller holds s_mutex and i_ceph_lock, we drop both.
 */
static void handle_cap_grant(struct inode *inode,
			     struct ceph_mds_session *session,
			     struct ceph_cap *cap,
			     struct ceph_mds_caps *grant,
			     struct ceph_buffer *xattr_buf,
			     struct cap_extra_info *extra_info)
	__releases(ci->i_ceph_lock)
	__releases(session->s_mdsc->snap_rwsem)
{}

/*
 * Handle FLUSH_ACK from MDS, indicating that metadata we sent to the
 * MDS has been safely committed.
 */
static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
				 struct ceph_mds_caps *m,
				 struct ceph_mds_session *session,
				 struct ceph_cap *cap)
	__releases(ci->i_ceph_lock)
{}

void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
			   bool *wake_ci, bool *wake_mdsc)
{}

void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
			 bool *wake_ci, bool *wake_mdsc)
{}

/*
 * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
 * throw away our cap_snap.
 *
 * Caller hold s_mutex.
 */
static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
				     struct ceph_mds_caps *m,
				     struct ceph_mds_session *session)
{}

/*
 * Handle TRUNC from MDS, indicating file truncation.
 *
 * caller hold s_mutex.
 */
static bool handle_cap_trunc(struct inode *inode,
			     struct ceph_mds_caps *trunc,
			     struct ceph_mds_session *session,
			     struct cap_extra_info *extra_info)
{}

/*
 * Handle EXPORT from MDS.  Cap is being migrated _from_ this mds to a
 * different one.  If we are the most recent migration we've seen (as
 * indicated by mseq), make note of the migrating cap bits for the
 * duration (until we see the corresponding IMPORT).
 *
 * caller holds s_mutex
 */
static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
			      struct ceph_mds_cap_peer *ph,
			      struct ceph_mds_session *session)
{}

/*
 * Handle cap IMPORT.
 *
 * caller holds s_mutex. acquires i_ceph_lock
 */
static void handle_cap_import(struct ceph_mds_client *mdsc,
			      struct inode *inode, struct ceph_mds_caps *im,
			      struct ceph_mds_cap_peer *ph,
			      struct ceph_mds_session *session,
			      struct ceph_cap **target_cap, int *old_issued)
{}

#ifdef CONFIG_FS_ENCRYPTION
static int parse_fscrypt_fields(void **p, void *end,
				struct cap_extra_info *extra)
{}
#else
static int parse_fscrypt_fields(void **p, void *end,
				struct cap_extra_info *extra)
{
	u32 len;

	/* Don't care about these fields unless we're encryption-capable */
	ceph_decode_32_safe(p, end, len, bad);
	if (len)
		ceph_decode_skip_n(p, end, len, bad);
	ceph_decode_32_safe(p, end, len, bad);
	if (len)
		ceph_decode_skip_n(p, end, len, bad);
	return 0;
bad:
	return -EIO;
}
#endif

/*
 * Handle a caps message from the MDS.
 *
 * Identify the appropriate session, inode, and call the right handler
 * based on the cap op.
 */
void ceph_handle_caps(struct ceph_mds_session *session,
		      struct ceph_msg *msg)
{}

/*
 * Delayed work handler to process end of delayed cap release LRU list.
 *
 * If new caps are added to the list while processing it, these won't get
 * processed in this run.  In this case, the ci->i_hold_caps_max will be
 * returned so that the work can be scheduled accordingly.
 */
unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
{}

/*
 * Flush all dirty caps to the mds
 */
static void flush_dirty_session_caps(struct ceph_mds_session *s)
{}

void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
{}

/*
 * Flush all cap releases to the mds
 */
static void flush_cap_releases(struct ceph_mds_session *s)
{}

void ceph_flush_cap_releases(struct ceph_mds_client *mdsc)
{}

void __ceph_touch_fmode(struct ceph_inode_info *ci,
			struct ceph_mds_client *mdsc, int fmode)
{}

void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
{}

/*
 * Drop open file reference.  If we were the last open file,
 * we may need to release capabilities to the MDS (or schedule
 * their delayed release).
 */
void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
{}

/*
 * For a soon-to-be unlinked file, drop the LINK caps. If it
 * looks like the link count will hit 0, drop any other caps (other
 * than PIN) we don't specifically want (due to the file still being
 * open).
 */
int ceph_drop_caps_for_unlink(struct inode *inode)
{}

/*
 * Helpers for embedding cap and dentry lease releases into mds
 * requests.
 *
 * @force is used by dentry_release (below) to force inclusion of a
 * record for the directory inode, even when there aren't any caps to
 * drop.
 */
int ceph_encode_inode_release(void **p, struct inode *inode,
			      int mds, int drop, int unless, int force)
{}

/**
 * ceph_encode_dentry_release - encode a dentry release into an outgoing request
 * @p: outgoing request buffer
 * @dentry: dentry to release
 * @dir: dir to release it from
 * @mds: mds that we're speaking to
 * @drop: caps being dropped
 * @unless: unless we have these caps
 *
 * Encode a dentry release into an outgoing request buffer. Returns 1 if the
 * thing was released, or a negative error code otherwise.
 */
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
			       struct inode *dir,
			       int mds, int drop, int unless)
{}

static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
{}

int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate)
{}