dlmmaster.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * dlmmod.c
 *
 * standalone DLM module
 *
 * Copyright (C) 2004 Oracle.  All rights reserved.
 */


#include <linux/module.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/blkdev.h>
#include <linux/socket.h>
#include <linux/inet.h>
#include <linux/spinlock.h>
#include <linux/delay.h>


#include "../cluster/heartbeat.h"
#include "../cluster/nodemanager.h"
#include "../cluster/tcp.h"

#include "dlmapi.h"
#include "dlmcommon.h"
#include "dlmdomain.h"
#include "dlmdebug.h"

#define MLOG_MASK_PREFIX …
#include "../cluster/masklog.h"

static void dlm_mle_node_down(struct dlm_ctxt *dlm,
			      struct dlm_master_list_entry *mle,
			      struct o2nm_node *node,
			      int idx);
static void dlm_mle_node_up(struct dlm_ctxt *dlm,
			    struct dlm_master_list_entry *mle,
			    struct o2nm_node *node,
			    int idx);

static void dlm_assert_master_worker(struct dlm_work_item *item, void *data);
static int dlm_do_assert_master(struct dlm_ctxt *dlm,
				struct dlm_lock_resource *res,
				void *nodemap, u32 flags);
static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data);

static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
				struct dlm_master_list_entry *mle,
				const char *name,
				unsigned int namelen)
{ … }

static struct kmem_cache *dlm_lockres_cache;
static struct kmem_cache *dlm_lockname_cache;
static struct kmem_cache *dlm_mle_cache;

static void dlm_mle_release(struct kref *kref);
static void dlm_init_mle(struct dlm_master_list_entry *mle,
			enum dlm_mle_type type,
			struct dlm_ctxt *dlm,
			struct dlm_lock_resource *res,
			const char *name,
			unsigned int namelen);
static void dlm_put_mle(struct dlm_master_list_entry *mle);
static void __dlm_put_mle(struct dlm_master_list_entry *mle);
static int dlm_find_mle(struct dlm_ctxt *dlm,
			struct dlm_master_list_entry **mle,
			char *name, unsigned int namelen);

static int dlm_do_master_request(struct dlm_lock_resource *res,
				 struct dlm_master_list_entry *mle, int to);


static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
				     struct dlm_lock_resource *res,
				     struct dlm_master_list_entry *mle,
				     int *blocked);
static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
				    struct dlm_lock_resource *res,
				    struct dlm_master_list_entry *mle,
				    int blocked);
static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
				 struct dlm_lock_resource *res,
				 struct dlm_master_list_entry *mle,
				 struct dlm_master_list_entry **oldmle,
				 const char *name, unsigned int namelen,
				 u8 new_master, u8 master);

static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
				    struct dlm_lock_resource *res);
static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
				      struct dlm_lock_resource *res);
static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
				       struct dlm_lock_resource *res,
				       u8 target);
static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
				       struct dlm_lock_resource *res);


int dlm_is_host_down(int errno)
{ … }


/*
 * MASTER LIST FUNCTIONS
 */


/*
 * regarding master list entries and heartbeat callbacks:
 *
 * in order to avoid sleeping and allocation that occurs in
 * heartbeat, master list entries are simply attached to the
 * dlm's established heartbeat callbacks.  the mle is attached
 * when it is created, and since the dlm->spinlock is held at
 * that time, any heartbeat event will be properly discovered
 * by the mle.  the mle needs to be detached from the
 * dlm->mle_hb_events list as soon as heartbeat events are no
 * longer useful to the mle, and before the mle is freed.
 *
 * as a general rule, heartbeat events are no longer needed by
 * the mle once an "answer" regarding the lock master has been
 * received.
 */
static inline void __dlm_mle_attach_hb_events(struct dlm_ctxt *dlm,
					      struct dlm_master_list_entry *mle)
{ … }


static inline void __dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
					      struct dlm_master_list_entry *mle)
{ … }


static inline void dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
					    struct dlm_master_list_entry *mle)
{ … }

static void dlm_get_mle_inuse(struct dlm_master_list_entry *mle)
{ … }

static void dlm_put_mle_inuse(struct dlm_master_list_entry *mle)
{ … }

/* remove from list and free */
static void __dlm_put_mle(struct dlm_master_list_entry *mle)
{ … }


/* must not have any spinlocks coming in */
static void dlm_put_mle(struct dlm_master_list_entry *mle)
{ … }

static inline void dlm_get_mle(struct dlm_master_list_entry *mle)
{ … }

static void dlm_init_mle(struct dlm_master_list_entry *mle,
			enum dlm_mle_type type,
			struct dlm_ctxt *dlm,
			struct dlm_lock_resource *res,
			const char *name,
			unsigned int namelen)
{ … }

void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
{ … }

void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
{ … }

/* returns 1 if found, 0 if not */
static int dlm_find_mle(struct dlm_ctxt *dlm,
			struct dlm_master_list_entry **mle,
			char *name, unsigned int namelen)
{ … }

void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
{ … }

static void dlm_mle_node_down(struct dlm_ctxt *dlm,
			      struct dlm_master_list_entry *mle,
			      struct o2nm_node *node, int idx)
{ … }

static void dlm_mle_node_up(struct dlm_ctxt *dlm,
			    struct dlm_master_list_entry *mle,
			    struct o2nm_node *node, int idx)
{ … }


int dlm_init_mle_cache(void)
{ … }

void dlm_destroy_mle_cache(void)
{ … }

static void dlm_mle_release(struct kref *kref)
{ … }


/*
 * LOCK RESOURCE FUNCTIONS
 */

int dlm_init_master_caches(void)
{ … }

void dlm_destroy_master_caches(void)
{ … }

static void dlm_lockres_release(struct kref *kref)
{ … }

void dlm_lockres_put(struct dlm_lock_resource *res)
{ … }

static void dlm_init_lockres(struct dlm_ctxt *dlm,
			     struct dlm_lock_resource *res,
			     const char *name, unsigned int namelen)
{ … }

struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
				   const char *name,
				   unsigned int namelen)
{ … }

void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm,
				struct dlm_lock_resource *res, int bit)
{ … }

void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm,
				  struct dlm_lock_resource *res, int bit)
{ … }

static void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
				   struct dlm_lock_resource *res)
{ … }

void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
				   struct dlm_lock_resource *res)
{ … }

void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
				   struct dlm_lock_resource *res)
{ … }

void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
		struct dlm_lock_resource *res)
{ … }

static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
		struct dlm_lock_resource *res)
{ … }

static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
		struct dlm_lock_resource *res)
{ … }

/*
 * lookup a lock resource by name.
 * may already exist in the hashtable.
 * lockid is null terminated
 *
 * if not, allocate enough for the lockres and for
 * the temporary structure used in doing the mastering.
 *
 * also, do a lookup in the dlm->master_list to see
 * if another node has begun mastering the same lock.
 * if so, there should be a block entry in there
 * for this name, and we should *not* attempt to master
 * the lock here.   need to wait around for that node
 * to assert_master (or die).
 *
 */
struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
					  const char *lockid,
					  int namelen,
					  int flags)
{ … }


#define DLM_MASTERY_TIMEOUT_MS …

static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
				     struct dlm_lock_resource *res,
				     struct dlm_master_list_entry *mle,
				     int *blocked)
{ … }

struct dlm_bitmap_diff_iter
{ … };

enum dlm_node_state_change
{ … };

static void dlm_bitmap_diff_iter_init(struct dlm_bitmap_diff_iter *iter,
				      unsigned long *orig_bm,
				      unsigned long *cur_bm)
{ … }

static int dlm_bitmap_diff_iter_next(struct dlm_bitmap_diff_iter *iter,
				     enum dlm_node_state_change *state)
{ … }


static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
				    struct dlm_lock_resource *res,
				    struct dlm_master_list_entry *mle,
				    int blocked)
{ … }


/*
 * DLM_MASTER_REQUEST_MSG
 *
 * returns: 0 on success,
 *          -errno on a network error
 *
 * on error, the caller should assume the target node is "dead"
 *
 */

static int dlm_do_master_request(struct dlm_lock_resource *res,
				 struct dlm_master_list_entry *mle, int to)
{ … }

/*
 * locks that can be taken here:
 * dlm->spinlock
 * res->spinlock
 * mle->spinlock
 * dlm->master_list
 *
 * if possible, TRIM THIS DOWN!!!
 */
int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
			       void **ret_data)
{ … }

/*
 * DLM_ASSERT_MASTER_MSG
 */


/*
 * NOTE: this can be used for debugging
 * can periodically run all locks owned by this node
 * and re-assert across the cluster...
 */
static int dlm_do_assert_master(struct dlm_ctxt *dlm,
				struct dlm_lock_resource *res,
				void *nodemap, u32 flags)
{ … }

/*
 * locks that can be taken here:
 * dlm->spinlock
 * res->spinlock
 * mle->spinlock
 * dlm->master_list
 *
 * if possible, TRIM THIS DOWN!!!
 */
int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
			      void **ret_data)
{ … }

void dlm_assert_master_post_handler(int status, void *data, void *ret_data)
{ … }

int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
			       struct dlm_lock_resource *res,
			       int ignore_higher, u8 request_from, u32 flags)
{ … }

static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
{ … }

/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
 * We cannot wait for node recovery to complete to begin mastering this
 * lockres because this lockres is used to kick off recovery! ;-)
 * So, do a pre-check on all living nodes to see if any of those nodes
 * think that $RECOVERY is currently mastered by a dead node.  If so,
 * we wait a short time to allow that node to get notified by its own
 * heartbeat stack, then check again.  All $RECOVERY lock resources
 * mastered by dead nodes are purged when the heartbeat callback is
 * fired, so we can know for sure that it is safe to continue once
 * the node returns a live node or no node.  */
static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
				       struct dlm_lock_resource *res)
{ … }

/*
 * DLM_DEREF_LOCKRES_MSG
 */

int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
{ … }

int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
			      void **ret_data)
{ … }

int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
			      void **ret_data)
{ … }

static void dlm_drop_lockres_ref_done(struct dlm_ctxt *dlm,
		struct dlm_lock_resource *res, u8 node)
{ … }

static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
{ … }

/*
 * A migratable resource is one that is :
 * 1. locally mastered, and,
 * 2. zero local locks, and,
 * 3. one or more non-local locks, or, one or more references
 * Returns 1 if yes, 0 if not.
 */
static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm,
				      struct dlm_lock_resource *res)
{ … }

/*
 * DLM_MIGRATE_LOCKRES
 */


static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
			       struct dlm_lock_resource *res, u8 target)
{ … }

/*
 * Should be called only after beginning the domain leave process.
 * There should not be any remaining locks on nonlocal lock resources,
 * and there should be no local locks left on locally mastered resources.
 *
 * Called with the dlm spinlock held, may drop it to do migration, but
 * will re-acquire before exit.
 *
 * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped
 */
int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
	__must_hold(&dlm->spinlock)
{ … }

int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{ … }

static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
				     struct dlm_lock_resource *res,
				     u8 mig_target)
{ … }

static int dlm_lockres_is_dirty(struct dlm_ctxt *dlm,
				struct dlm_lock_resource *res)
{ … }


static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
				       struct dlm_lock_resource *res,
				       u8 target)
{ … }

/* last step in the migration process.
 * original master calls this to free all of the dlm_lock
 * structures that used to be for other nodes. */
static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
				      struct dlm_lock_resource *res)
{ … }

/*
 * Pick a node to migrate the lock resource to. This function selects a
 * potential target based first on the locks and then on refmap. It skips
 * nodes that are in the process of exiting the domain.
 */
static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
				    struct dlm_lock_resource *res)
{ … }

/* this is called by the new master once all lockres
 * data has been received */
static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
				  struct dlm_lock_resource *res,
				  u8 master, u8 new_master,
				  struct dlm_node_iter *iter)
{ … }


/* if there is an existing mle for this lockres, we now know who the master is.
 * (the one who sent us *this* message) we can clear it up right away.
 * since the process that put the mle on the list still has a reference to it,
 * we can unhash it now, set the master and wake the process.  as a result,
 * we will have no mle in the list to start with.  now we can add an mle for
 * the migration and this should be the only one found for those scanning the
 * list.  */
int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
				void **ret_data)
{ … }

/* must be holding dlm->spinlock and dlm->master_lock
 * when adding a migration mle, we can clear any other mles
 * in the master list because we know with certainty that
 * the master is "master".  so we remove any old mle from
 * the list after setting it's master field, and then add
 * the new migration mle.  this way we can hold with the rule
 * of having only one mle for a given lock name at all times. */
static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
				 struct dlm_lock_resource *res,
				 struct dlm_master_list_entry *mle,
				 struct dlm_master_list_entry **oldmle,
				 const char *name, unsigned int namelen,
				 u8 new_master, u8 master)
{ … }

/*
 * Sets the owner of the lockres, associated to the mle, to UNKNOWN
 */
static struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm,
					struct dlm_master_list_entry *mle)
{ … }

static void dlm_clean_migration_mle(struct dlm_ctxt *dlm,
				    struct dlm_master_list_entry *mle)
{ … }

static void dlm_clean_block_mle(struct dlm_ctxt *dlm,
				struct dlm_master_list_entry *mle, u8 dead_node)
{ … }

void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
{ … }

int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
			 u8 old_master)
{ … }

/*
 * LOCKRES AST REFCOUNT
 * this is integral to migration
 */

/* for future intent to call an ast, reserve one ahead of time.
 * this should be called only after waiting on the lockres
 * with dlm_wait_on_lockres, and while still holding the
 * spinlock after the call. */
void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res)
{ … }

/*
 * used to drop the reserved ast, either because it went unused,
 * or because the ast/bast was actually called.
 *
 * also, if there is a pending migration on this lockres,
 * and this was the last pending ast on the lockres,
 * atomically set the MIGRATING flag before we drop the lock.
 * this is how we ensure that migration can proceed with no
 * asts in progress.  note that it is ok if the state of the
 * queues is such that a lock should be granted in the future
 * or that a bast should be fired, because the new master will
 * shuffle the lists on this lockres as soon as it is migrated.
 */
void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
			     struct dlm_lock_resource *res)
{ … }

void dlm_force_free_mles(struct dlm_ctxt *dlm)
{ … }
linux/fs/ocfs2/dlm/dlmmaster.c