linux/mm/memory-tiers.c

// SPDX-License-Identifier: GPL-2.0
#include <linux/slab.h>
#include <linux/lockdep.h>
#include <linux/sysfs.h>
#include <linux/kobject.h>
#include <linux/memory.h>
#include <linux/memory-tiers.h>
#include <linux/notifier.h>
#include <linux/sched/sysctl.h>

#include "internal.h"

struct memory_tier {};

struct demotion_nodes {};

struct node_memory_type_map {};

static DEFINE_MUTEX(memory_tier_lock);
static LIST_HEAD(memory_tiers);
/*
 * The list is used to store all memory types that are not created
 * by a device driver.
 */
static LIST_HEAD(default_memory_types);
static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
struct memory_dev_type *default_dram_type;
nodemask_t default_dram_nodes __initdata =;

static const struct bus_type memory_tier_subsys =;

#ifdef CONFIG_NUMA_BALANCING
/**
 * folio_use_access_time - check if a folio reuses cpupid for page access time
 * @folio: folio to check
 *
 * folio's _last_cpupid field is repurposed by memory tiering. In memory
 * tiering mode, cpupid of slow memory folio (not toptier memory) is used to
 * record page access time.
 *
 * Return: the folio _last_cpupid is used to record page access time
 */
bool folio_use_access_time(struct folio *folio)
{}
#endif

#ifdef CONFIG_MIGRATION
static int top_tier_adistance;
/*
 * node_demotion[] examples:
 *
 * Example 1:
 *
 * Node 0 & 1 are CPU + DRAM nodes, node 2 & 3 are PMEM nodes.
 *
 * node distances:
 * node   0    1    2    3
 *    0  10   20   30   40
 *    1  20   10   40   30
 *    2  30   40   10   40
 *    3  40   30   40   10
 *
 * memory_tiers0 = 0-1
 * memory_tiers1 = 2-3
 *
 * node_demotion[0].preferred = 2
 * node_demotion[1].preferred = 3
 * node_demotion[2].preferred = <empty>
 * node_demotion[3].preferred = <empty>
 *
 * Example 2:
 *
 * Node 0 & 1 are CPU + DRAM nodes, node 2 is memory-only DRAM node.
 *
 * node distances:
 * node   0    1    2
 *    0  10   20   30
 *    1  20   10   30
 *    2  30   30   10
 *
 * memory_tiers0 = 0-2
 *
 * node_demotion[0].preferred = <empty>
 * node_demotion[1].preferred = <empty>
 * node_demotion[2].preferred = <empty>
 *
 * Example 3:
 *
 * Node 0 is CPU + DRAM nodes, Node 1 is HBM node, node 2 is PMEM node.
 *
 * node distances:
 * node   0    1    2
 *    0  10   20   30
 *    1  20   10   40
 *    2  30   40   10
 *
 * memory_tiers0 = 1
 * memory_tiers1 = 0
 * memory_tiers2 = 2
 *
 * node_demotion[0].preferred = 2
 * node_demotion[1].preferred = 0
 * node_demotion[2].preferred = <empty>
 *
 */
static struct demotion_nodes *node_demotion __read_mostly;
#endif /* CONFIG_MIGRATION */

static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms);

/* The lock is used to protect `default_dram_perf*` info and nid. */
static DEFINE_MUTEX(default_dram_perf_lock);
static bool default_dram_perf_error;
static struct access_coordinate default_dram_perf;
static int default_dram_perf_ref_nid =;
static const char *default_dram_perf_ref_source;

static inline struct memory_tier *to_memory_tier(struct device *device)
{}

static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier)
{}

static void memory_tier_device_release(struct device *dev)
{}

static ssize_t nodelist_show(struct device *dev,
			     struct device_attribute *attr, char *buf)
{}
static DEVICE_ATTR_RO(nodelist);

static struct attribute *memtier_dev_attrs[] =;

static const struct attribute_group memtier_dev_group =;

static const struct attribute_group *memtier_dev_groups[] =;

static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype)
{}

static struct memory_tier *__node_get_memory_tier(int node)
{}

#ifdef CONFIG_MIGRATION
bool node_is_toptier(int node)
{}

void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
{}

/**
 * next_demotion_node() - Get the next node in the demotion path
 * @node: The starting node to lookup the next node
 *
 * Return: node id for next memory node in the demotion path hierarchy
 * from @node; NUMA_NO_NODE if @node is terminal.  This does not keep
 * @node online or guarantee that it *continues* to be the next demotion
 * target.
 */
int next_demotion_node(int node)
{}

static void disable_all_demotion_targets(void)
{}

static void dump_demotion_targets(void)
{}

/*
 * Find an automatic demotion target for all memory
 * nodes. Failing here is OK.  It might just indicate
 * being at the end of a chain.
 */
static void establish_demotion_targets(void)
{}

#else
static inline void establish_demotion_targets(void) {}
#endif /* CONFIG_MIGRATION */

static inline void __init_node_memory_type(int node, struct memory_dev_type *memtype)
{}

static struct memory_tier *set_node_memory_tier(int node)
{}

static void destroy_memory_tier(struct memory_tier *memtier)
{}

static bool clear_node_memory_tier(int node)
{}

static void release_memtype(struct kref *kref)
{}

struct memory_dev_type *alloc_memory_type(int adistance)
{}
EXPORT_SYMBOL_GPL();

void put_memory_type(struct memory_dev_type *memtype)
{}
EXPORT_SYMBOL_GPL();

void init_node_memory_type(int node, struct memory_dev_type *memtype)
{}
EXPORT_SYMBOL_GPL();

void clear_node_memory_type(int node, struct memory_dev_type *memtype)
{}
EXPORT_SYMBOL_GPL();

struct memory_dev_type *mt_find_alloc_memory_type(int adist, struct list_head *memory_types)
{}
EXPORT_SYMBOL_GPL();

void mt_put_memory_types(struct list_head *memory_types)
{}
EXPORT_SYMBOL_GPL();

/*
 * This is invoked via `late_initcall()` to initialize memory tiers for
 * memory nodes, both with and without CPUs. After the initialization of
 * firmware and devices, adistance algorithms are expected to be provided.
 */
static int __init memory_tier_late_init(void)
{}
late_initcall(memory_tier_late_init);

static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix)
{}

int mt_set_default_dram_perf(int nid, struct access_coordinate *perf,
			     const char *source)
{}

int mt_perf_to_adistance(struct access_coordinate *perf, int *adist)
{}
EXPORT_SYMBOL_GPL();

/**
 * register_mt_adistance_algorithm() - Register memory tiering abstract distance algorithm
 * @nb: The notifier block which describe the algorithm
 *
 * Return: 0 on success, errno on error.
 *
 * Every memory tiering abstract distance algorithm provider needs to
 * register the algorithm with register_mt_adistance_algorithm().  To
 * calculate the abstract distance for a specified memory node, the
 * notifier function will be called unless some high priority
 * algorithm has provided result.  The prototype of the notifier
 * function is as follows,
 *
 *   int (*algorithm_notifier)(struct notifier_block *nb,
 *                             unsigned long nid, void *data);
 *
 * Where "nid" specifies the memory node, "data" is the pointer to the
 * returned abstract distance (that is, "int *adist").  If the
 * algorithm provides the result, NOTIFY_STOP should be returned.
 * Otherwise, return_value & %NOTIFY_STOP_MASK == 0 to allow the next
 * algorithm in the chain to provide the result.
 */
int register_mt_adistance_algorithm(struct notifier_block *nb)
{}
EXPORT_SYMBOL_GPL();

/**
 * unregister_mt_adistance_algorithm() - Unregister memory tiering abstract distance algorithm
 * @nb: the notifier block which describe the algorithm
 *
 * Return: 0 on success, errno on error.
 */
int unregister_mt_adistance_algorithm(struct notifier_block *nb)
{}
EXPORT_SYMBOL_GPL();

/**
 * mt_calc_adistance() - Calculate abstract distance with registered algorithms
 * @node: the node to calculate abstract distance for
 * @adist: the returned abstract distance
 *
 * Return: if return_value & %NOTIFY_STOP_MASK != 0, then some
 * abstract distance algorithm provides the result, and return it via
 * @adist.  Otherwise, no algorithm can provide the result and @adist
 * will be kept as it is.
 */
int mt_calc_adistance(int node, int *adist)
{}
EXPORT_SYMBOL_GPL();

static int __meminit memtier_hotplug_callback(struct notifier_block *self,
					      unsigned long action, void *_arg)
{}

static int __init memory_tier_init(void)
{}
subsys_initcall(memory_tier_init);

bool numa_demotion_enabled =;

#ifdef CONFIG_MIGRATION
#ifdef CONFIG_SYSFS
static ssize_t demotion_enabled_show(struct kobject *kobj,
				     struct kobj_attribute *attr, char *buf)
{}

static ssize_t demotion_enabled_store(struct kobject *kobj,
				      struct kobj_attribute *attr,
				      const char *buf, size_t count)
{}

static struct kobj_attribute numa_demotion_enabled_attr =;

static struct attribute *numa_attrs[] =;

static const struct attribute_group numa_attr_group =;

static int __init numa_init_sysfs(void)
{}
subsys_initcall(numa_init_sysfs);
#endif /* CONFIG_SYSFS */
#endif