linux/fs/bcachefs/btree_update.h

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_UPDATE_H
#define _BCACHEFS_BTREE_UPDATE_H

#include "btree_iter.h"
#include "journal.h"

struct bch_fs;
struct btree;

void bch2_btree_node_prep_for_write(struct btree_trans *,
				    struct btree_path *, struct btree *);
bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
				struct btree *, struct btree_node_iter *,
				struct bkey_i *);

int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64);
int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64);
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);

void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
				struct bkey_i *, u64);

#define BCH_TRANS_COMMIT_FLAGS()							\
	x(no_enospc,	"don't check for enospc")					\
	x(no_check_rw,	"don't attempt to take a ref on c->writes")			\
	x(lazy_rw,	"go read-write if we haven't yet - only for use in recovery")	\
	x(no_journal_res, "don't take a journal reservation, instead "			\
			"pin journal entry referred to by trans->journal_res.seq")	\
	x(journal_reclaim, "operation required for journal reclaim; may return error"	\
			"instead of deadlocking if BCH_WATERMARK_reclaim not specified")\
	x(skip_accounting_apply, "we're in journal replay - accounting updates have already been applied")

enum __bch_trans_commit_flags {
	/* First bits for bch_watermark: */
	__BCH_TRANS_COMMIT_FLAGS_START = BCH_WATERMARK_BITS,
#define x(n, ...)	__BCH_TRANS_COMMIT_##n,
	BCH_TRANS_COMMIT_FLAGS()
#undef x
};

enum bch_trans_commit_flags {
#define x(n, ...)	BCH_TRANS_COMMIT_##n = BIT(__BCH_TRANS_COMMIT_##n),
	BCH_TRANS_COMMIT_FLAGS()
#undef x
};

void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags);

int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
				unsigned, unsigned);
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);

int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
				struct bkey_i *, enum btree_iter_update_trigger_flags);

int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
			enum btree_iter_update_trigger_flags);
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct
		disk_reservation *, int flags, enum
		btree_iter_update_trigger_flags iter_flags);

int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
				  struct bpos, struct bpos, unsigned, u64 *);
int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
			    struct bpos, struct bpos, unsigned, u64 *);

int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool);

static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
						enum btree_id btree, struct bpos pos)
{
	return bch2_btree_bit_mod_buffered(trans, btree, pos, false);
}

int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
				     struct bpos, struct bpos);

/*
 * For use when splitting extents in existing snapshots:
 *
 * If @old_pos is an interior snapshot node, iterate over descendent snapshot
 * nodes: for every descendent snapshot in whiche @old_pos is overwritten and
 * not visible, emit a whiteout at @new_pos.
 */
static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
						 enum btree_id btree,
						 struct bpos old_pos,
						 struct bpos new_pos)
{
	if (!btree_type_has_snapshots(btree) ||
	    bkey_eq(old_pos, new_pos))
		return 0;

	return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos);
}

int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *,
				       enum btree_iter_update_trigger_flags,
				       struct bkey_s_c, struct bkey_s_c);

int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
			     enum btree_id, struct bpos);

int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
				   struct bkey_i *, enum btree_iter_update_trigger_flags);

struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned);

static inline struct jset_entry *btree_trans_journal_entries_top(struct btree_trans *trans)
{
	return (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
}

static inline struct jset_entry *
bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
{
	if (!trans->journal_entries ||
	    trans->journal_entries_u64s + u64s > trans->journal_entries_size)
		return __bch2_trans_jset_entry_alloc(trans, u64s);

	struct jset_entry *e = btree_trans_journal_entries_top(trans);
	trans->journal_entries_u64s += u64s;
	return e;
}

int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);

static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
					    enum btree_id btree,
					    struct bkey_i *k)
{
	/*
	 * Most updates skip the btree write buffer until journal replay is
	 * finished because synchronization with journal replay relies on having
	 * a btree node locked - if we're overwriting a key in the journal that
	 * journal replay hasn't yet replayed, we have to mark it as
	 * overwritten.
	 *
	 * But accounting updates don't overwrite, they're deltas, and they have
	 * to be flushed to the btree strictly in order for journal replay to be
	 * able to tell which updates need to be applied:
	 */
	if (k->k.type != KEY_TYPE_accounting &&
	    unlikely(trans->journal_replay_not_finished))
		return bch2_btree_insert_clone_trans(trans, btree, k);

	struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
	int ret = PTR_ERR_OR_ZERO(e);
	if (ret)
		return ret;

	journal_entry_init(e, BCH_JSET_ENTRY_write_buffer_keys, btree, 0, k->k.u64s);
	bkey_copy(e->start, k);
	return 0;
}

void bch2_trans_commit_hook(struct btree_trans *,
			    struct btree_trans_commit_hook *);
int __bch2_trans_commit(struct btree_trans *, unsigned);

__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);

/**
 * bch2_trans_commit - insert keys at given iterator positions
 *
 * This is main entry point for btree updates.
 *
 * Return values:
 * -EROFS: filesystem read only
 * -EIO: journal or btree node IO error
 */
static inline int bch2_trans_commit(struct btree_trans *trans,
				    struct disk_reservation *disk_res,
				    u64 *journal_seq,
				    unsigned flags)
{
	trans->disk_res		= disk_res;
	trans->journal_seq	= journal_seq;

	return __bch2_trans_commit(trans, flags);
}

#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do)	\
	lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
					(_journal_seq), (_flags)))

#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do)	\
	nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
					(_journal_seq), (_flags)))

#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)		\
	bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))

#define trans_for_each_update(_trans, _i)				\
	for (struct btree_insert_entry *_i = (_trans)->updates;		\
	     (_i) < (_trans)->updates + (_trans)->nr_updates;		\
	     (_i)++)

static inline void bch2_trans_reset_updates(struct btree_trans *trans)
{
	trans_for_each_update(trans, i)
		bch2_path_put(trans, i->path, true);

	trans->nr_updates		= 0;
	trans->journal_entries_u64s	= 0;
	trans->hooks			= NULL;
	trans->extra_disk_res		= 0;
}

static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
						  unsigned type, unsigned min_bytes)
{
	unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
	struct bkey_i *mut;

	if (type && k.k->type != type)
		return ERR_PTR(-ENOENT);

	/* extra padding for varint_decode_fast... */
	mut = bch2_trans_kmalloc_nomemzero(trans, bytes + 8);
	if (!IS_ERR(mut)) {
		bkey_reassemble(mut, k);

		if (unlikely(bytes > bkey_bytes(k.k))) {
			memset((void *) mut + bkey_bytes(k.k), 0,
			       bytes - bkey_bytes(k.k));
			mut->k.u64s = DIV_ROUND_UP(bytes, sizeof(u64));
		}
	}
	return mut;
}

static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
{
	return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
}

#define bch2_bkey_make_mut_noupdate_typed(_trans, _k, _type)		\
	bkey_i_to_##_type(__bch2_bkey_make_mut_noupdate(_trans, _k,	\
				KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))

static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
					struct bkey_s_c *k, unsigned flags,
					unsigned type, unsigned min_bytes)
{
	struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes);
	int ret;

	if (IS_ERR(mut))
		return mut;

	ret = bch2_trans_update(trans, iter, mut, flags);
	if (ret)
		return ERR_PTR(ret);

	*k = bkey_i_to_s_c(mut);
	return mut;
}

static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
						struct bkey_s_c *k, unsigned flags)
{
	return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0);
}

#define bch2_bkey_make_mut_typed(_trans, _iter, _k, _flags, _type)	\
	bkey_i_to_##_type(__bch2_bkey_make_mut(_trans, _iter, _k, _flags,\
				KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))

static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
					 struct btree_iter *iter,
					 unsigned btree_id, struct bpos pos,
					 unsigned flags, unsigned type, unsigned min_bytes)
{
	struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter,
				btree_id, pos, flags|BTREE_ITER_intent, type);
	struct bkey_i *ret = IS_ERR(k.k)
		? ERR_CAST(k.k)
		: __bch2_bkey_make_mut_noupdate(trans, k, 0, min_bytes);
	if (IS_ERR(ret))
		bch2_trans_iter_exit(trans, iter);
	return ret;
}

static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
					       struct btree_iter *iter,
					       unsigned btree_id, struct bpos pos,
					       unsigned flags)
{
	return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0);
}

static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans,
					 struct btree_iter *iter,
					 unsigned btree_id, struct bpos pos,
					 unsigned flags, unsigned type, unsigned min_bytes)
{
	struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter,
				btree_id, pos, flags|BTREE_ITER_intent, type, min_bytes);
	int ret;

	if (IS_ERR(mut))
		return mut;

	ret = bch2_trans_update(trans, iter, mut, flags);
	if (ret) {
		bch2_trans_iter_exit(trans, iter);
		return ERR_PTR(ret);
	}

	return mut;
}

static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans,
						       struct btree_iter *iter,
						       unsigned btree_id, struct bpos pos,
						       unsigned flags, unsigned min_bytes)
{
	return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes);
}

static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
					       struct btree_iter *iter,
					       unsigned btree_id, struct bpos pos,
					       unsigned flags)
{
	return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0);
}

#define bch2_bkey_get_mut_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\
	bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _iter,		\
			_btree_id, _pos, _flags,			\
			KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))

static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter,
					       unsigned flags, unsigned type, unsigned val_size)
{
	struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size);
	int ret;

	if (IS_ERR(k))
		return k;

	bkey_init(&k->k);
	k->k.p = iter->pos;
	k->k.type = type;
	set_bkey_val_bytes(&k->k, val_size);

	ret = bch2_trans_update(trans, iter, k, flags);
	if (unlikely(ret))
		return ERR_PTR(ret);
	return k;
}

#define bch2_bkey_alloc(_trans, _iter, _flags, _type)			\
	bkey_i_to_##_type(__bch2_bkey_alloc(_trans, _iter, _flags,	\
				KEY_TYPE_##_type, sizeof(struct bch_##_type)))

#endif /* _BCACHEFS_BTREE_UPDATE_H */