linux/mm/vmstat.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/mm/vmstat.c
 *
 *  Manages VM statistics
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *
 *  zoned VM statistics
 *  Copyright (C) 2006 Silicon Graphics, Inc.,
 *		Christoph Lameter <[email protected]>
 *  Copyright (C) 2008-2014 Christoph Lameter
 */
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/vmstat.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/math64.h>
#include <linux/writeback.h>
#include <linux/compaction.h>
#include <linux/mm_inline.h>
#include <linux/page_owner.h>
#include <linux/sched/isolation.h>

#include "internal.h"

#ifdef CONFIG_NUMA
int sysctl_vm_numa_stat =;

/* zero numa counters within a zone */
static void zero_zone_numa_counters(struct zone *zone)
{}

/* zero numa counters of all the populated zones */
static void zero_zones_numa_counters(void)
{}

/* zero global numa counters */
static void zero_global_numa_counters(void)
{}

static void invalid_numa_statistics(void)
{}

static DEFINE_MUTEX(vm_numa_stat_lock);

int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
		void *buffer, size_t *length, loff_t *ppos)
{}
#endif

#ifdef CONFIG_VM_EVENT_COUNTERS
DEFINE_PER_CPU(struct vm_event_state, vm_event_states) =;
EXPORT_PER_CPU_SYMBOL();

static void sum_vm_events(unsigned long *ret)
{}

/*
 * Accumulate the vm event counters across all CPUs.
 * The result is unavoidably approximate - it can change
 * during and after execution of this function.
*/
void all_vm_events(unsigned long *ret)
{}
EXPORT_SYMBOL_GPL();

/*
 * Fold the foreign cpu events into our own.
 *
 * This is adding to the events on one processor
 * but keeps the global counts constant.
 */
void vm_events_fold_cpu(int cpu)
{}

#endif /* CONFIG_VM_EVENT_COUNTERS */

/*
 * Manage combined zone based / global counters
 *
 * vm_stat contains the global counters
 */
atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
EXPORT_SYMBOL();
EXPORT_SYMBOL();

#ifdef CONFIG_NUMA
static void fold_vm_zone_numa_events(struct zone *zone)
{}

void fold_vm_numa_events(void)
{}
#endif

#ifdef CONFIG_SMP

int calculate_pressure_threshold(struct zone *zone)
{}

int calculate_normal_threshold(struct zone *zone)
{}

/*
 * Refresh the thresholds for each zone.
 */
void refresh_zone_stat_thresholds(void)
{}

void set_pgdat_percpu_threshold(pg_data_t *pgdat,
				int (*calculate_pressure)(struct zone *))
{}

/*
 * For use when we know that interrupts are disabled,
 * or when we know that preemption is disabled and that
 * particular counter cannot be updated from interrupt context.
 */
void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
			   long delta)
{}
EXPORT_SYMBOL();

void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
				long delta)
{}
EXPORT_SYMBOL();

/*
 * Optimized increment and decrement functions.
 *
 * These are only for a single page and therefore can take a struct page *
 * argument instead of struct zone *. This allows the inclusion of the code
 * generated for page_zone(page) into the optimized functions.
 *
 * No overflow check is necessary and therefore the differential can be
 * incremented or decremented in place which may allow the compilers to
 * generate better code.
 * The increment or decrement is known and therefore one boundary check can
 * be omitted.
 *
 * NOTE: These functions are very performance sensitive. Change only
 * with care.
 *
 * Some processors have inc/dec instructions that are atomic vs an interrupt.
 * However, the code must first determine the differential location in a zone
 * based on the processor number and then inc/dec the counter. There is no
 * guarantee without disabling preemption that the processor will not change
 * in between and therefore the atomicity vs. interrupt cannot be exploited
 * in a useful way here.
 */
void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{}

void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
{}

void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
{}
EXPORT_SYMBOL();

void __inc_node_page_state(struct page *page, enum node_stat_item item)
{}
EXPORT_SYMBOL();

void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
{}

void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
{}

void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
{}
EXPORT_SYMBOL();

void __dec_node_page_state(struct page *page, enum node_stat_item item)
{}
EXPORT_SYMBOL();

#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
/*
 * If we have cmpxchg_local support then we do not need to incur the overhead
 * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
 *
 * mod_state() modifies the zone counter state through atomic per cpu
 * operations.
 *
 * Overstep mode specifies how overstep should handled:
 *     0       No overstepping
 *     1       Overstepping half of threshold
 *     -1      Overstepping minus half of threshold
*/
static inline void mod_zone_state(struct zone *zone,
       enum zone_stat_item item, long delta, int overstep_mode)
{}

void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
			 long delta)
{}
EXPORT_SYMBOL();

void inc_zone_page_state(struct page *page, enum zone_stat_item item)
{}
EXPORT_SYMBOL();

void dec_zone_page_state(struct page *page, enum zone_stat_item item)
{}
EXPORT_SYMBOL();

static inline void mod_node_state(struct pglist_data *pgdat,
       enum node_stat_item item, int delta, int overstep_mode)
{}

void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
					long delta)
{}
EXPORT_SYMBOL();

void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
{}

void inc_node_page_state(struct page *page, enum node_stat_item item)
{}
EXPORT_SYMBOL();

void dec_node_page_state(struct page *page, enum node_stat_item item)
{}
EXPORT_SYMBOL();
#else
/*
 * Use interrupt disable to serialize counter updates
 */
void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
			 long delta)
{
	unsigned long flags;

	local_irq_save(flags);
	__mod_zone_page_state(zone, item, delta);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(mod_zone_page_state);

void inc_zone_page_state(struct page *page, enum zone_stat_item item)
{
	unsigned long flags;
	struct zone *zone;

	zone = page_zone(page);
	local_irq_save(flags);
	__inc_zone_state(zone, item);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(inc_zone_page_state);

void dec_zone_page_state(struct page *page, enum zone_stat_item item)
{
	unsigned long flags;

	local_irq_save(flags);
	__dec_zone_page_state(page, item);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(dec_zone_page_state);

void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
{
	unsigned long flags;

	local_irq_save(flags);
	__inc_node_state(pgdat, item);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(inc_node_state);

void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
					long delta)
{
	unsigned long flags;

	local_irq_save(flags);
	__mod_node_page_state(pgdat, item, delta);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(mod_node_page_state);

void inc_node_page_state(struct page *page, enum node_stat_item item)
{
	unsigned long flags;
	struct pglist_data *pgdat;

	pgdat = page_pgdat(page);
	local_irq_save(flags);
	__inc_node_state(pgdat, item);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(inc_node_page_state);

void dec_node_page_state(struct page *page, enum node_stat_item item)
{
	unsigned long flags;

	local_irq_save(flags);
	__dec_node_page_state(page, item);
	local_irq_restore(flags);
}
EXPORT_SYMBOL(dec_node_page_state);
#endif

/*
 * Fold a differential into the global counters.
 * Returns the number of counters updated.
 */
static int fold_diff(int *zone_diff, int *node_diff)
{}

/*
 * Update the zone counters for the current cpu.
 *
 * Note that refresh_cpu_vm_stats strives to only access
 * node local memory. The per cpu pagesets on remote zones are placed
 * in the memory local to the processor using that pageset. So the
 * loop over all zones will access a series of cachelines local to
 * the processor.
 *
 * The call to zone_page_state_add updates the cachelines with the
 * statistics in the remote zone struct as well as the global cachelines
 * with the global counters. These could cause remote node cache line
 * bouncing and will have to be only done when necessary.
 *
 * The function returns the number of global counters updated.
 */
static int refresh_cpu_vm_stats(bool do_pagesets)
{}

/*
 * Fold the data for an offline cpu into the global array.
 * There cannot be any access by the offline cpu and therefore
 * synchronization is simplified.
 */
void cpu_vm_stats_fold(int cpu)
{}

/*
 * this is only called if !populated_zone(zone), which implies no other users of
 * pset->vm_stat_diff[] exist.
 */
void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
{}
#endif

#ifdef CONFIG_NUMA
/*
 * Determine the per node value of a stat item. This function
 * is called frequently in a NUMA machine, so try to be as
 * frugal as possible.
 */
unsigned long sum_zone_node_page_state(int node,
				 enum zone_stat_item item)
{}

/* Determine the per node value of a numa stat item. */
unsigned long sum_zone_numa_event_state(int node,
				 enum numa_stat_item item)
{}

/*
 * Determine the per node value of a stat item.
 */
unsigned long node_page_state_pages(struct pglist_data *pgdat,
				    enum node_stat_item item)
{}

unsigned long node_page_state(struct pglist_data *pgdat,
			      enum node_stat_item item)
{}
#endif

#ifdef CONFIG_COMPACTION

struct contig_page_info {};

/*
 * Calculate the number of free pages in a zone, how many contiguous
 * pages are free and how many are large enough to satisfy an allocation of
 * the target size. Note that this function makes no attempt to estimate
 * how many suitable free blocks there *might* be if MOVABLE pages were
 * migrated. Calculating that is possible, but expensive and can be
 * figured out from userspace
 */
static void fill_contig_page_info(struct zone *zone,
				unsigned int suitable_order,
				struct contig_page_info *info)
{}

/*
 * A fragmentation index only makes sense if an allocation of a requested
 * size would fail. If that is true, the fragmentation index indicates
 * whether external fragmentation or a lack of memory was the problem.
 * The value can be used to determine if page reclaim or compaction
 * should be used
 */
static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
{}

/*
 * Calculates external fragmentation within a zone wrt the given order.
 * It is defined as the percentage of pages found in blocks of size
 * less than 1 << order. It returns values in range [0, 100].
 */
unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
{}

/* Same as __fragmentation index but allocs contig_page_info on stack */
int fragmentation_index(struct zone *zone, unsigned int order)
{}
#endif

#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
    defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
#ifdef CONFIG_ZONE_DMA
#define TEXT_FOR_DMA(xx)
#else
#define TEXT_FOR_DMA
#endif

#ifdef CONFIG_ZONE_DMA32
#define TEXT_FOR_DMA32(xx)
#else
#define TEXT_FOR_DMA32
#endif

#ifdef CONFIG_HIGHMEM
#define TEXT_FOR_HIGHMEM
#else
#define TEXT_FOR_HIGHMEM(xx)
#endif

#ifdef CONFIG_ZONE_DEVICE
#define TEXT_FOR_DEVICE(xx)
#else
#define TEXT_FOR_DEVICE
#endif

#define TEXTS_FOR_ZONES(xx)

const char * const vmstat_text[] =;
#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */

#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
     defined(CONFIG_PROC_FS)
static void *frag_start(struct seq_file *m, loff_t *pos)
{}

static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
{}

static void frag_stop(struct seq_file *m, void *arg)
{}

/*
 * Walk zones in a node and print using a callback.
 * If @assert_populated is true, only use callback for zones that are populated.
 */
static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
		bool assert_populated, bool nolock,
		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
{}
#endif

#ifdef CONFIG_PROC_FS
static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
						struct zone *zone)
{}

/*
 * This walks the free areas for each zone.
 */
static int frag_show(struct seq_file *m, void *arg)
{}

static void pagetypeinfo_showfree_print(struct seq_file *m,
					pg_data_t *pgdat, struct zone *zone)
{}

/* Print out the free pages at each order for each migatetype */
static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
{}

static void pagetypeinfo_showblockcount_print(struct seq_file *m,
					pg_data_t *pgdat, struct zone *zone)
{}

/* Print out the number of pageblocks for each migratetype */
static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
{}

/*
 * Print out the number of pageblocks for each migratetype that contain pages
 * of other types. This gives an indication of how well fallbacks are being
 * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
 * to determine what is going on
 */
static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
{}

/*
 * This prints out statistics in relation to grouping pages by mobility.
 * It is expensive to collect so do not constantly read the file.
 */
static int pagetypeinfo_show(struct seq_file *m, void *arg)
{}

static const struct seq_operations fragmentation_op =;

static const struct seq_operations pagetypeinfo_op =;

static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
{}

static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
							struct zone *zone)
{}

/*
 * Output information about zones in @pgdat.  All zones are printed regardless
 * of whether they are populated or not: lowmem_reserve_ratio operates on the
 * set of all zones and userspace would not be aware of such zones if they are
 * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
 */
static int zoneinfo_show(struct seq_file *m, void *arg)
{}

static const struct seq_operations zoneinfo_op =;

#define NR_VMSTAT_ITEMS

static void *vmstat_start(struct seq_file *m, loff_t *pos)
{}

static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
{}

static int vmstat_show(struct seq_file *m, void *arg)
{}

static void vmstat_stop(struct seq_file *m, void *arg)
{}

static const struct seq_operations vmstat_op =;
#endif /* CONFIG_PROC_FS */

#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly =;

#ifdef CONFIG_PROC_FS
static void refresh_vm_stats(struct work_struct *work)
{}

int vmstat_refresh(const struct ctl_table *table, int write,
		   void *buffer, size_t *lenp, loff_t *ppos)
{}
#endif /* CONFIG_PROC_FS */

static void vmstat_update(struct work_struct *w)
{}

/*
 * Check if the diffs for a certain cpu indicate that
 * an update is needed.
 */
static bool need_update(int cpu)
{}

/*
 * Switch off vmstat processing and then fold all the remaining differentials
 * until the diffs stay at zero. The function is used by NOHZ and can only be
 * invoked when tick processing is not active.
 */
void quiet_vmstat(void)
{}

/*
 * Shepherd worker thread that checks the
 * differentials of processors that have their worker
 * threads for vm statistics updates disabled because of
 * inactivity.
 */
static void vmstat_shepherd(struct work_struct *w);

static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);

static void vmstat_shepherd(struct work_struct *w)
{}

static void __init start_shepherd_timer(void)
{}

static void __init init_cpu_node_state(void)
{}

static int vmstat_cpu_online(unsigned int cpu)
{}

static int vmstat_cpu_down_prep(unsigned int cpu)
{}

static int vmstat_cpu_dead(unsigned int cpu)
{}

#endif

struct workqueue_struct *mm_percpu_wq;

void __init init_mm_internals(void)
{}

#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)

/*
 * Return an index indicating how much of the available free memory is
 * unusable for an allocation of the requested size.
 */
static int unusable_free_index(unsigned int order,
				struct contig_page_info *info)
{}

static void unusable_show_print(struct seq_file *m,
					pg_data_t *pgdat, struct zone *zone)
{}

/*
 * Display unusable free space index
 *
 * The unusable free space index measures how much of the available free
 * memory cannot be used to satisfy an allocation of a given size and is a
 * value between 0 and 1. The higher the value, the more of free memory is
 * unusable and by implication, the worse the external fragmentation is. This
 * can be expressed as a percentage by multiplying by 100.
 */
static int unusable_show(struct seq_file *m, void *arg)
{}

static const struct seq_operations unusable_sops =;

DEFINE_SEQ_ATTRIBUTE();

static void extfrag_show_print(struct seq_file *m,
					pg_data_t *pgdat, struct zone *zone)
{}

/*
 * Display fragmentation index for orders that allocations would fail for
 */
static int extfrag_show(struct seq_file *m, void *arg)
{}

static const struct seq_operations extfrag_sops =;

DEFINE_SEQ_ATTRIBUTE();

static int __init extfrag_debug_init(void)
{}

module_init();

#endif

/*
 * Page metadata size (struct page and page_ext) in pages
 */
static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata;

void __meminit mod_node_early_perpage_metadata(int nid, long delta)
{}

void __meminit store_early_perpage_metadata(void)
{}