/* * Ceph - scalable distributed file system * * Copyright (C) 2015 Intel Corporation All Rights Reserved * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software * Foundation. See file COPYING. * */ #ifdef __KERNEL__ # include <linux/string.h> # include <linux/slab.h> # include <linux/bug.h> # include <linux/kernel.h> # include <linux/crush/crush.h> # include <linux/crush/hash.h> # include <linux/crush/mapper.h> #else # include "crush_compat.h" # include "crush.h" # include "hash.h" # include "mapper.h" #endif #include "crush_ln_table.h" #define dprintk(args...) … /* * Implement the core CRUSH mapping algorithm. */ /** * crush_find_rule - find a crush_rule id for a given ruleset, type, and size. * @map: the crush_map * @ruleset: the storage ruleset id (user defined) * @type: storage ruleset type (user defined) * @size: output set size */ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size) { … } /* * bucket choose methods * * For each bucket algorithm, we have a "choose" method that, given a * crush input @x and replica position (usually, position in output set) @r, * will produce an item in the bucket. */ /* * Choose based on a random permutation of the bucket. * * We used to use some prime number arithmetic to do this, but it * wasn't very random, and had some other bad behaviors. Instead, we * calculate an actual random permutation of the bucket members. * Since this is expensive, we optimize for the r=0 case, which * captures the vast majority of calls. */ static int bucket_perm_choose(const struct crush_bucket *bucket, struct crush_work_bucket *work, int x, int r) { … } /* uniform */ static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket, struct crush_work_bucket *work, int x, int r) { … } /* list */ static int bucket_list_choose(const struct crush_bucket_list *bucket, int x, int r) { … } /* (binary) tree */ static int height(int n) { … } static int left(int x) { … } static int right(int x) { … } static int terminal(int x) { … } static int bucket_tree_choose(const struct crush_bucket_tree *bucket, int x, int r) { … } /* straw */ static int bucket_straw_choose(const struct crush_bucket_straw *bucket, int x, int r) { … } /* compute 2^44*log2(input+1) */ static __u64 crush_ln(unsigned int xin) { … } /* * straw2 * * for reference, see: * * https://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables * */ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket, const struct crush_choose_arg *arg, int position) { … } static __s32 *get_choose_arg_ids(const struct crush_bucket_straw2 *bucket, const struct crush_choose_arg *arg) { … } static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket, int x, int r, const struct crush_choose_arg *arg, int position) { … } static int crush_bucket_choose(const struct crush_bucket *in, struct crush_work_bucket *work, int x, int r, const struct crush_choose_arg *arg, int position) { … } /* * true if device is marked "out" (failed, fully offloaded) * of the cluster */ static int is_out(const struct crush_map *map, const __u32 *weight, int weight_max, int item, int x) { … } /** * crush_choose_firstn - choose numrep distinct items of given type * @map: the crush_map * @work: working space initialized by crush_init_workspace() * @bucket: the bucket we are choose an item from * @weight: weight vector (for map leaves) * @weight_max: size of weight vector * @x: crush input value * @numrep: the number of items to choose * @type: the type of item to choose * @out: pointer to output vector * @outpos: our position in that vector * @out_size: size of the out vector * @tries: number of attempts to make * @recurse_tries: number of attempts to have recursive chooseleaf make * @local_retries: localized retries * @local_fallback_retries: localized fallback retries * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) * @stable: stable mode starts rep=0 in the recursive call for all replicas * @vary_r: pass r to recursive calls * @out2: second output vector for leaf items (if @recurse_to_leaf) * @parent_r: r value passed from the parent * @choose_args: weights and ids for each known bucket */ static int crush_choose_firstn(const struct crush_map *map, struct crush_work *work, const struct crush_bucket *bucket, const __u32 *weight, int weight_max, int x, int numrep, int type, int *out, int outpos, int out_size, unsigned int tries, unsigned int recurse_tries, unsigned int local_retries, unsigned int local_fallback_retries, int recurse_to_leaf, unsigned int vary_r, unsigned int stable, int *out2, int parent_r, const struct crush_choose_arg *choose_args) { … } /* * crush_choose_indep: alternative breadth-first positionally stable mapping */ static void crush_choose_indep(const struct crush_map *map, struct crush_work *work, const struct crush_bucket *bucket, const __u32 *weight, int weight_max, int x, int left, int numrep, int type, int *out, int outpos, unsigned int tries, unsigned int recurse_tries, int recurse_to_leaf, int *out2, int parent_r, const struct crush_choose_arg *choose_args) { … } /* * This takes a chunk of memory and sets it up to be a shiny new * working area for a CRUSH placement computation. It must be called * on any newly allocated memory before passing it in to * crush_do_rule. It may be used repeatedly after that, so long as the * map has not changed. If the map /has/ changed, you must make sure * the working size is no smaller than what was allocated and re-run * crush_init_workspace. * * If you do retain the working space between calls to crush, make it * thread-local. */ void crush_init_workspace(const struct crush_map *map, void *v) { … } /** * crush_do_rule - calculate a mapping with the given input and rule * @map: the crush_map * @ruleno: the rule id * @x: hash input * @result: pointer to result vector * @result_max: maximum result size * @weight: weight vector (for map leaves) * @weight_max: size of weight vector * @cwin: pointer to at least crush_work_size() bytes of memory * @choose_args: weights and ids for each known bucket */ int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, const __u32 *weight, int weight_max, void *cwin, const struct crush_choose_arg *choose_args) { … }