linux/net/ceph/osdmap.c

// SPDX-License-Identifier: GPL-2.0

#include <linux/ceph/ceph_debug.h>

#include <linux/module.h>
#include <linux/slab.h>

#include <linux/ceph/libceph.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/decode.h>
#include <linux/crush/hash.h>
#include <linux/crush/mapper.h>

static __printf(2, 3)
void osdmap_info(const struct ceph_osdmap *map, const char *fmt, ...)
{}

char *ceph_osdmap_state_str(char *str, int len, u32 state)
{}

/* maps */

static int calc_bits_of(unsigned int t)
{}

/*
 * the foo_mask is the smallest value 2^n-1 that is >= foo.
 */
static void calc_pg_masks(struct ceph_pg_pool_info *pi)
{}

/*
 * decode crush map
 */
static int crush_decode_uniform_bucket(void **p, void *end,
				       struct crush_bucket_uniform *b)
{}

static int crush_decode_list_bucket(void **p, void *end,
				    struct crush_bucket_list *b)
{}

static int crush_decode_tree_bucket(void **p, void *end,
				    struct crush_bucket_tree *b)
{}

static int crush_decode_straw_bucket(void **p, void *end,
				     struct crush_bucket_straw *b)
{}

static int crush_decode_straw2_bucket(void **p, void *end,
				      struct crush_bucket_straw2 *b)
{}

struct crush_name_node {};

static struct crush_name_node *alloc_crush_name(size_t name_len)
{}

static void free_crush_name(struct crush_name_node *cn)
{}

DEFINE_RB_FUNCS()

static int decode_crush_names(void **p, void *end, struct rb_root *root)
{}

void clear_crush_names(struct rb_root *root)
{}

static struct crush_choose_arg_map *alloc_choose_arg_map(void)
{}

static void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
{}

DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
		node);

void clear_choose_args(struct crush_map *c)
{}

static u32 *decode_array_32_alloc(void **p, void *end, u32 *plen)
{}

/*
 * Assumes @arg is zero-initialized.
 */
static int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg)
{}

static int decode_choose_args(void **p, void *end, struct crush_map *c)
{}

static void crush_finalize(struct crush_map *c)
{}

static struct crush_map *crush_decode(void *pbyval, void *end)
{}

int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
{}

int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
{}

static struct ceph_pg_mapping *alloc_pg_mapping(size_t payload_len)
{}

static void free_pg_mapping(struct ceph_pg_mapping *pg)
{}

/*
 * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
 * to a set of osds) and primary_temp (explicit primary setting)
 */
DEFINE_RB_FUNCS2(pg_mapping, struct ceph_pg_mapping, pgid, ceph_pg_compare,
		 RB_BYPTR, const struct ceph_pg *, node)

/*
 * rbtree of pg pool info
 */
DEFINE_RB_FUNCS()

struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
{}

const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
{}
EXPORT_SYMBOL();

int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
{}
EXPORT_SYMBOL();

u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
{}
EXPORT_SYMBOL();

static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
{}

static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
{}

static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
{}

/*
 * CRUSH workspaces
 *
 * workspace_manager framework borrowed from fs/btrfs/compression.c.
 * Two simplifications: there is only one type of workspace and there
 * is always at least one workspace.
 */
static struct crush_work *alloc_workspace(const struct crush_map *c)
{}

static void free_workspace(struct crush_work *work)
{}

static void init_workspace_manager(struct workspace_manager *wsm)
{}

static void add_initial_workspace(struct workspace_manager *wsm,
				  struct crush_work *work)
{}

static void cleanup_workspace_manager(struct workspace_manager *wsm)
{}

/*
 * Finds an available workspace or allocates a new one.  If it's not
 * possible to allocate a new one, waits until there is one.
 */
static struct crush_work *get_workspace(struct workspace_manager *wsm,
					const struct crush_map *c)
{}

/*
 * Puts a workspace back on the list or frees it if we have enough
 * idle ones sitting around.
 */
static void put_workspace(struct workspace_manager *wsm,
			  struct crush_work *work)
{}

/*
 * osd map
 */
struct ceph_osdmap *ceph_osdmap_alloc(void)
{}

void ceph_osdmap_destroy(struct ceph_osdmap *map)
{}

/*
 * Adjust max_osd value, (re)allocate arrays.
 *
 * The new elements are properly initialized.
 */
static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
{}

static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
{}

#define OSDMAP_WRAPPER_COMPAT_VER
#define OSDMAP_CLIENT_DATA_COMPAT_VER

/*
 * Return 0 or error.  On success, *v is set to 0 for old (v6) osdmaps,
 * to struct_v of the client_data section for new (v7 and above)
 * osdmaps.
 */
static int get_osdmap_client_data_v(void **p, void *end,
				    const char *prefix, u8 *v)
{}

static int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
			  bool incremental)
{}

static int decode_pools(void **p, void *end, struct ceph_osdmap *map)
{}

static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
{}

decode_mapping_fn_t;

static int decode_pg_mapping(void **p, void *end, struct rb_root *mapping_root,
			     decode_mapping_fn_t fn, bool incremental)
{}

static struct ceph_pg_mapping *__decode_pg_temp(void **p, void *end,
						bool incremental)
{}

static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
{}

static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
{}

static struct ceph_pg_mapping *__decode_primary_temp(void **p, void *end,
						     bool incremental)
{}

static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
{}

static int decode_new_primary_temp(void **p, void *end,
				   struct ceph_osdmap *map)
{}

u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
{}

static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
{}

static int decode_primary_affinity(void **p, void *end,
				   struct ceph_osdmap *map)
{}

static int decode_new_primary_affinity(void **p, void *end,
				       struct ceph_osdmap *map)
{}

static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
						 bool __unused)
{}

static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
{}

static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
{}

static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
{}

static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
						       bool __unused)
{}

static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
{}

static int decode_new_pg_upmap_items(void **p, void *end,
				     struct ceph_osdmap *map)
{}

static int decode_old_pg_upmap_items(void **p, void *end,
				     struct ceph_osdmap *map)
{}

/*
 * decode a full map.
 */
static int osdmap_decode(void **p, void *end, bool msgr2,
			 struct ceph_osdmap *map)
{}

/*
 * Allocate and decode a full map.
 */
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2)
{}

/*
 * Encoding order is (new_up_client, new_state, new_weight).  Need to
 * apply in the (new_weight, new_state, new_up_client) order, because
 * an incremental map may look like e.g.
 *
 *     new_up_client: { osd=6, addr=... } # set osd_state and addr
 *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
 */
static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
				      bool msgr2, struct ceph_osdmap *map)
{}

/*
 * decode and apply an incremental map update.
 */
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
					     struct ceph_osdmap *map)
{}

void ceph_oloc_copy(struct ceph_object_locator *dest,
		    const struct ceph_object_locator *src)
{}
EXPORT_SYMBOL();

void ceph_oloc_destroy(struct ceph_object_locator *oloc)
{}
EXPORT_SYMBOL();

void ceph_oid_copy(struct ceph_object_id *dest,
		   const struct ceph_object_id *src)
{}
EXPORT_SYMBOL();

static __printf(2, 0)
int oid_printf_vargs(struct ceph_object_id *oid, const char *fmt, va_list ap)
{}

/*
 * If oid doesn't fit into inline buffer, BUG.
 */
void ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...)
{}
EXPORT_SYMBOL();

static __printf(3, 0)
int oid_aprintf_vargs(struct ceph_object_id *oid, gfp_t gfp,
		      const char *fmt, va_list ap)
{}

/*
 * If oid doesn't fit into inline buffer, allocate.
 */
int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp,
		     const char *fmt, ...)
{}
EXPORT_SYMBOL();

void ceph_oid_destroy(struct ceph_object_id *oid)
{}
EXPORT_SYMBOL();

/*
 * osds only
 */
static bool __osds_equal(const struct ceph_osds *lhs,
			 const struct ceph_osds *rhs)
{}

/*
 * osds + primary
 */
static bool osds_equal(const struct ceph_osds *lhs,
		       const struct ceph_osds *rhs)
{}

static bool osds_valid(const struct ceph_osds *set)
{}

void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
{}

bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
		      u32 new_pg_num)
{}

bool ceph_is_new_interval(const struct ceph_osds *old_acting,
			  const struct ceph_osds *new_acting,
			  const struct ceph_osds *old_up,
			  const struct ceph_osds *new_up,
			  int old_size,
			  int new_size,
			  int old_min_size,
			  int new_min_size,
			  u32 old_pg_num,
			  u32 new_pg_num,
			  bool old_sort_bitwise,
			  bool new_sort_bitwise,
			  bool old_recovery_deletes,
			  bool new_recovery_deletes,
			  const struct ceph_pg *pgid)
{}

static int calc_pg_rank(int osd, const struct ceph_osds *acting)
{}

static bool primary_changed(const struct ceph_osds *old_acting,
			    const struct ceph_osds *new_acting)
{}

bool ceph_osds_changed(const struct ceph_osds *old_acting,
		       const struct ceph_osds *new_acting,
		       bool any_change)
{}

/*
 * Map an object into a PG.
 *
 * Should only be called with target_oid and target_oloc (as opposed to
 * base_oid and base_oloc), since tiering isn't taken into account.
 */
void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
				 const struct ceph_object_id *oid,
				 const struct ceph_object_locator *oloc,
				 struct ceph_pg *raw_pgid)
{}

int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
			      const struct ceph_object_id *oid,
			      const struct ceph_object_locator *oloc,
			      struct ceph_pg *raw_pgid)
{}
EXPORT_SYMBOL();

/*
 * Map a raw PG (full precision ps) into an actual PG.
 */
static void raw_pg_to_pg(struct ceph_pg_pool_info *pi,
			 const struct ceph_pg *raw_pgid,
			 struct ceph_pg *pgid)
{}

/*
 * Map a raw PG (full precision ps) into a placement ps (placement
 * seed).  Include pool id in that value so that different pools don't
 * use the same seeds.
 */
static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
			 const struct ceph_pg *raw_pgid)
{}

/*
 * Magic value used for a "default" fallback choose_args, used if the
 * crush_choose_arg_map passed to do_crush() does not exist.  If this
 * also doesn't exist, fall back to canonical weights.
 */
#define CEPH_DEFAULT_CHOOSE_ARGS

static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
		    int *result, int result_max,
		    const __u32 *weight, int weight_max,
		    s64 choose_args_index)
{}

static void remove_nonexistent_osds(struct ceph_osdmap *osdmap,
				    struct ceph_pg_pool_info *pi,
				    struct ceph_osds *set)
{}

/*
 * Calculate raw set (CRUSH output) for given PG and filter out
 * nonexistent OSDs.  ->primary is undefined for a raw set.
 *
 * Placement seed (CRUSH input) is returned through @ppps.
 */
static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
			   struct ceph_pg_pool_info *pi,
			   const struct ceph_pg *raw_pgid,
			   struct ceph_osds *raw,
			   u32 *ppps)
{}

/* apply pg_upmap[_items] mappings */
static void apply_upmap(struct ceph_osdmap *osdmap,
			const struct ceph_pg *pgid,
			struct ceph_osds *raw)
{}

/*
 * Given raw set, calculate up set and up primary.  By definition of an
 * up set, the result won't contain nonexistent or down OSDs.
 *
 * This is done in-place - on return @set is the up set.  If it's
 * empty, ->primary will remain undefined.
 */
static void raw_to_up_osds(struct ceph_osdmap *osdmap,
			   struct ceph_pg_pool_info *pi,
			   struct ceph_osds *set)
{}

static void apply_primary_affinity(struct ceph_osdmap *osdmap,
				   struct ceph_pg_pool_info *pi,
				   u32 pps,
				   struct ceph_osds *up)
{}

/*
 * Get pg_temp and primary_temp mappings for given PG.
 *
 * Note that a PG may have none, only pg_temp, only primary_temp or
 * both pg_temp and primary_temp mappings.  This means @temp isn't
 * always a valid OSD set on return: in the "only primary_temp" case,
 * @temp will have its ->primary >= 0 but ->size == 0.
 */
static void get_temp_osds(struct ceph_osdmap *osdmap,
			  struct ceph_pg_pool_info *pi,
			  const struct ceph_pg *pgid,
			  struct ceph_osds *temp)
{}

/*
 * Map a PG to its acting set as well as its up set.
 *
 * Acting set is used for data mapping purposes, while up set can be
 * recorded for detecting interval changes and deciding whether to
 * resend a request.
 */
void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
			       struct ceph_pg_pool_info *pi,
			       const struct ceph_pg *raw_pgid,
			       struct ceph_osds *up,
			       struct ceph_osds *acting)
{}

bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
			      struct ceph_pg_pool_info *pi,
			      const struct ceph_pg *raw_pgid,
			      struct ceph_spg *spgid)
{}

/*
 * Return acting primary for given PG, or -1 if none.
 */
int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
			      const struct ceph_pg *raw_pgid)
{}
EXPORT_SYMBOL();

static struct crush_loc_node *alloc_crush_loc(size_t type_name_len,
					      size_t name_len)
{}

static void free_crush_loc(struct crush_loc_node *loc)
{}

static int crush_loc_compare(const struct crush_loc *loc1,
			     const struct crush_loc *loc2)
{}

DEFINE_RB_FUNCS2(crush_loc, struct crush_loc_node, cl_loc, crush_loc_compare,
		 RB_BYPTR, const struct crush_loc *, cl_node)

/*
 * Parses a set of <bucket type name>':'<bucket name> pairs separated
 * by '|', e.g. "rack:foo1|rack:foo2|datacenter:bar".
 *
 * Note that @crush_location is modified by strsep().
 */
int ceph_parse_crush_location(char *crush_location, struct rb_root *locs)
{}

int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2)
{}

void ceph_clear_crush_locs(struct rb_root *locs)
{}

/*
 * [a-zA-Z0-9-_.]+
 */
static bool is_valid_crush_name(const char *name)
{}

/*
 * Gets the parent of an item.  Returns its id (<0 because the
 * parent is always a bucket), type id (>0 for the same reason,
 * via @parent_type_id) and location (via @parent_loc).  If no
 * parent, returns 0.
 *
 * Does a linear search, as there are no parent pointers of any
 * kind.  Note that the result is ambiguous for items that occur
 * multiple times in the map.
 */
static int get_immediate_parent(struct crush_map *c, int id,
				u16 *parent_type_id,
				struct crush_loc *parent_loc)
{}

/*
 * Calculates the locality/distance from an item to a client
 * location expressed in terms of CRUSH hierarchy as a set of
 * (bucket type name, bucket name) pairs.  Specifically, looks
 * for the lowest-valued bucket type for which the location of
 * @id matches one of the locations in @locs, so for standard
 * bucket types (host = 1, rack = 3, datacenter = 8, zone = 9)
 * a matching host is closer than a matching rack and a matching
 * data center is closer than a matching zone.
 *
 * Specifying multiple locations (a "multipath" location) such
 * as "rack=foo1 rack=foo2 datacenter=bar" is allowed -- @locs
 * is a multimap.  The locality will be:
 *
 * - 3 for OSDs in racks foo1 and foo2
 * - 8 for OSDs in data center bar
 * - -1 for all other OSDs
 *
 * The lowest possible bucket type is 1, so the best locality
 * for an OSD is 1 (i.e. a matching host).  Locality 0 would be
 * the OSD itself.
 */
int ceph_get_crush_locality(struct ceph_osdmap *osdmap, int id,
			    struct rb_root *locs)
{}