/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMZONE_H #define _LINUX_MMZONE_H #ifndef __ASSEMBLY__ #ifndef __GENERATING_BOUNDS_H #include <linux/spinlock.h> #include <linux/list.h> #include <linux/list_nulls.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/cache.h> #include <linux/threads.h> #include <linux/numa.h> #include <linux/init.h> #include <linux/seqlock.h> #include <linux/nodemask.h> #include <linux/pageblock-flags.h> #include <linux/page-flags-layout.h> #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/local_lock.h> #include <linux/zswap.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER #define MAX_PAGE_ORDER … #else #define MAX_PAGE_ORDER … #endif #define MAX_ORDER_NR_PAGES … #define IS_MAX_ORDER_ALIGNED(pfn) … #define NR_PAGE_ORDERS … /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should * coalesce naturally under reasonable reclaim pressure and those which * will not. */ #define PAGE_ALLOC_COSTLY_ORDER … enum migratetype { … }; /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA #define is_migrate_cma(migratetype) … #define is_migrate_cma_page(_page) … #define is_migrate_cma_folio(folio, pfn) … #else #define is_migrate_cma … #define is_migrate_cma_page … #define is_migrate_cma_folio … #endif static inline bool is_migrate_movable(int mt) { … } /* * Check whether a migratetype can be merged with another migratetype. * * It is only mergeable when it can fall back to other migratetypes for * allocation. See fallbacks[MIGRATE_TYPES][3] in page_alloc.c. */ static inline bool migratetype_is_mergeable(int mt) { … } #define for_each_migratetype_order(order, type) … extern int page_group_by_mobility_disabled; #define MIGRATETYPE_MASK … #define get_pageblock_migratetype(page) … #define folio_migratetype(folio) … struct free_area { … }; struct pglist_data; #ifdef CONFIG_NUMA enum numa_stat_item { … }; #else #define NR_VM_NUMA_EVENT_ITEMS … #endif enum zone_stat_item { … }; enum node_stat_item { … }; /* * Returns true if the item should be printed in THPs (/proc/vmstat * currently prints number of anon, file and shmem THPs. But the item * is charged in pages). */ static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item) { … } /* * Returns true if the value is measured in bytes (most vmstat values are * measured in pages). This defines the API part, the internal representation * might be different. */ static __always_inline bool vmstat_item_in_bytes(int idx) { … } /* * We do arithmetic on the LRU lists in various places in the code, * so it is important to keep the active lists LRU_ACTIVE higher in * the array than the corresponding inactive lists, and to keep * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists. * * This has to be kept in sync with the statistics in zone_stat_item * above and the descriptions in vmstat_text in mm/vmstat.c */ #define LRU_BASE … #define LRU_ACTIVE … #define LRU_FILE … enum lru_list { … }; enum vmscan_throttle_state { … }; #define for_each_lru(lru) … #define for_each_evictable_lru(lru) … static inline bool is_file_lru(enum lru_list lru) { … } static inline bool is_active_lru(enum lru_list lru) { … } #define WORKINGSET_ANON … #define WORKINGSET_FILE … #define ANON_AND_FILE … enum lruvec_flags { … }; #endif /* !__GENERATING_BOUNDS_H */ /* * Evictable pages are divided into multiple generations. The youngest and the * oldest generation numbers, max_seq and min_seq, are monotonically increasing. * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the * corresponding generation. The gen counter in folio->flags stores gen+1 while * a page is on one of lrugen->folios[]. Otherwise it stores 0. * * A page is added to the youngest generation on faulting. The aging needs to * check the accessed bit at least twice before handing this page over to the * eviction. The first check takes care of the accessed bit set on the initial * fault; the second check makes sure this page hasn't been used since then. * This process, AKA second chance, requires a minimum of two generations, * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive * LRU, e.g., /proc/vmstat, these two generations are considered active; the * rest of generations, if they exist, are considered inactive. See * lru_gen_is_active(). * * PG_active is always cleared while a page is on one of lrugen->folios[] so * that the aging needs not to worry about it. And it's set again when a page * considered active is isolated for non-reclaiming purposes, e.g., migration. * See lru_gen_add_folio() and lru_gen_del_folio(). * * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits * in folio->flags. */ #define MIN_NR_GENS … #define MAX_NR_GENS … /* * Each generation is divided into multiple tiers. A page accessed N times * through file descriptors is in tier order_base_2(N). A page in the first tier * (N=0,1) is marked by PG_referenced unless it was faulted in through page * tables or read ahead. A page in any other tier (N>1) is marked by * PG_referenced and PG_workingset. This implies a minimum of two tiers is * supported without using additional bits in folio->flags. * * In contrast to moving across generations which requires the LRU lock, moving * across tiers only involves atomic operations on folio->flags and therefore * has a negligible cost in the buffered access path. In the eviction path, * comparisons of refaulted/(evicted+protected) from the first tier and the * rest infer whether pages accessed multiple times through file descriptors * are statistically hot and thus worth protecting. * * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in * folio->flags. */ #define MAX_NR_TIERS … #ifndef __GENERATING_BOUNDS_H struct lruvec; struct page_vma_mapped_walk; #define LRU_GEN_MASK … #define LRU_REFS_MASK … #ifdef CONFIG_LRU_GEN enum { … }; enum { … }; #define MIN_LRU_BATCH … #define MAX_LRU_BATCH … /* whether to keep historical stats from evicted generations */ #ifdef CONFIG_LRU_GEN_STATS #define NR_HIST_GENS … #else #define NR_HIST_GENS … #endif /* * The youngest generation number is stored in max_seq for both anon and file * types as they are aged on an equal footing. The oldest generation numbers are * stored in min_seq[] separately for anon and file types as clean file pages * can be evicted regardless of swap constraints. * * Normally anon and file min_seq are in sync. But if swapping is constrained, * e.g., out of swap space, file min_seq is allowed to advance and leave anon * min_seq behind. * * The number of pages in each generation is eventually consistent and therefore * can be transiently negative when reset_batch_size() is pending. */ struct lru_gen_folio { … }; enum { … }; /* double-buffering Bloom filters */ #define NR_BLOOM_FILTERS … struct lru_gen_mm_state { … }; struct lru_gen_mm_walk { … }; /* * For each node, memcgs are divided into two generations: the old and the * young. For each generation, memcgs are randomly sharded into multiple bins * to improve scalability. For each bin, the hlist_nulls is virtually divided * into three segments: the head, the tail and the default. * * An onlining memcg is added to the tail of a random bin in the old generation. * The eviction starts at the head of a random bin in the old generation. The * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes * the old generation, is incremented when all its bins become empty. * * There are four operations: * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; * 3. The first attempt to reclaim a memcg offlined or below reclaimable size * threshold, which triggers MEMCG_LRU_TAIL; * 4. The second attempt to reclaim a memcg offlined or below reclaimable size * threshold, which triggers MEMCG_LRU_YOUNG; * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * * Notes: * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing * of their max_seq counters ensures the eventual fairness to all eligible * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). * 2. There are only two valid generations: old (seq) and young (seq+1). * MEMCG_NR_GENS is set to three so that when reading the generation counter * locklessly, a stale value (seq-1) does not wraparound to young. */ #define MEMCG_NR_GENS … #define MEMCG_NR_BINS … struct lru_gen_memcg { … }; void lru_gen_init_pgdat(struct pglist_data *pgdat); void lru_gen_init_lruvec(struct lruvec *lruvec); void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); void lru_gen_online_memcg(struct mem_cgroup *memcg); void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid); #else /* !CONFIG_LRU_GEN */ static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) { } static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) { } static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_online_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { } #endif /* CONFIG_LRU_GEN */ struct lruvec { … }; /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE … /* Isolate unevictable pages */ #define ISOLATE_UNEVICTABLE … /* LRU Isolation modes. */ isolate_mode_t; enum zone_watermarks { … }; /* * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define NR_PCP_THP … #else #define NR_PCP_THP … #endif #define NR_LOWORDER_PCP_LISTS … #define NR_PCP_LISTS … #define min_wmark_pages(z) … #define low_wmark_pages(z) … #define high_wmark_pages(z) … #define wmark_pages(z, i) … /* * Flags used in pcp->flags field. * * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the * previous page freeing. To avoid to drain PCP for an accident * high-order page freeing. * * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before * draining PCP for consecutive high-order pages freeing without * allocation if data cache slice of CPU is large enough. To reduce * zone lock contention and keep cache-hot pages reusing. */ #define PCPF_PREV_FREE_HIGH_ORDER … #define PCPF_FREE_HIGH_BATCH … struct per_cpu_pages { … } ____cacheline_aligned_in_smp; struct per_cpu_zonestat { … }; struct per_cpu_nodestat { … }; #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { … }; #ifndef __GENERATING_BOUNDS_H #define ASYNC_AND_SYNC … struct zone { … } ____cacheline_internodealigned_in_smp; enum pgdat_flags { … }; enum zone_flags { … }; static inline unsigned long zone_managed_pages(struct zone *zone) { … } static inline unsigned long zone_cma_pages(struct zone *zone) { … } static inline unsigned long zone_end_pfn(const struct zone *zone) { … } static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) { … } static inline bool zone_is_initialized(struct zone *zone) { … } static inline bool zone_is_empty(struct zone *zone) { … } #ifndef BUILD_VDSO32_64 /* * The zone field is never updated after free_area_init_core() * sets it, so none of the operations on it need to be atomic. */ /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ #define SECTIONS_PGOFF … #define NODES_PGOFF … #define ZONES_PGOFF … #define LAST_CPUPID_PGOFF … #define KASAN_TAG_PGOFF … #define LRU_GEN_PGOFF … #define LRU_REFS_PGOFF … /* * Define the bit shifts to access each section. For non-existent * sections we define the shift as 0; that plus a 0 mask ensures * the compiler will optimise away reference to them. */ #define SECTIONS_PGSHIFT … #define NODES_PGSHIFT … #define ZONES_PGSHIFT … #define LAST_CPUPID_PGSHIFT … #define KASAN_TAG_PGSHIFT … /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ #ifdef NODE_NOT_IN_PAGE_FLAGS #define ZONEID_SHIFT … #define ZONEID_PGOFF … #else #define ZONEID_SHIFT … #define ZONEID_PGOFF … #endif #define ZONEID_PGSHIFT … #define ZONES_MASK … #define NODES_MASK … #define SECTIONS_MASK … #define LAST_CPUPID_MASK … #define KASAN_TAG_MASK … #define ZONEID_MASK … static inline enum zone_type page_zonenum(const struct page *page) { … } static inline enum zone_type folio_zonenum(const struct folio *folio) { … } #ifdef CONFIG_ZONE_DEVICE static inline bool is_zone_device_page(const struct page *page) { … } /* * Consecutive zone device pages should not be merged into the same sgl * or bvec segment with other types of pages or if they belong to different * pgmaps. Otherwise getting the pgmap of a given segment is not possible * without scanning the entire segment. This helper returns true either if * both pages are not zone device pages or both pages are zone device pages * with the same pgmap. */ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, const struct page *b) { … } extern void memmap_init_zone_device(struct zone *, unsigned long, unsigned long, struct dev_pagemap *); #else static inline bool is_zone_device_page(const struct page *page) { return false; } static inline bool zone_device_pages_have_same_pgmap(const struct page *a, const struct page *b) { return true; } #endif static inline bool folio_is_zone_device(const struct folio *folio) { … } static inline bool is_zone_movable_page(const struct page *page) { … } static inline bool folio_is_zone_movable(const struct folio *folio) { … } #endif /* * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty * intersection with the given zone */ static inline bool zone_intersects(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages) { … } /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the * queues ("queue_length >> 12") during an aging round. */ #define DEF_PRIORITY … /* Maximum number of zones on a zonelist */ #define MAX_ZONES_PER_ZONELIST … enum { … }; /* * This struct contains information about a zone in a zonelist. It is stored * here to avoid dereferences into large structures and lookups of tables */ struct zoneref { … }; /* * One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the * allocation, the other zones are fallback zones, in decreasing * priority. * * To speed the reading of the zonelist, the zonerefs contain the zone index * of the entry being read. Helper functions to access information given * a struct zoneref are * * zonelist_zone() - Return the struct zone * for an entry in _zonerefs * zonelist_zone_idx() - Return the index of the zone for an entry * zonelist_node_idx() - Return the index of the node for an entry */ struct zonelist { … }; /* * The array of struct pages for flatmem. * It must be declared for SPARSEMEM as well because there are configurations * that rely on that. */ extern struct page *mem_map; #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct deferred_split { … }; #endif #ifdef CONFIG_MEMORY_FAILURE /* * Per NUMA node memory failure handling statistics. */ struct memory_failure_stats { … }; #endif /* * On NUMA machines, each NUMA node would have a pg_data_t to describe * it's memory layout. On UMA machines there is a single pglist_data which * describes the whole memory. * * Memory statistics and page replacement data structures are maintained on a * per-zone basis. */ pg_data_t; #define node_present_pages(nid) … #define node_spanned_pages(nid) … #define node_start_pfn(nid) … #define node_end_pfn(nid) … static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) { … } #include <linux/memory_hotplug.h> void build_all_zonelists(pg_data_t *pgdat); void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, enum zone_type highest_zoneidx); bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags, long free_pages); bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. */ enum meminit_context { … }; extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, unsigned long size); extern void lruvec_init(struct lruvec *lruvec); static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) { … } #ifdef CONFIG_HAVE_MEMORYLESS_NODES int local_memory_node(int node_id); #else static inline int local_memory_node(int node_id) { return node_id; }; #endif /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ #define zone_idx(zone) … #ifdef CONFIG_ZONE_DEVICE static inline bool zone_is_zone_device(struct zone *zone) { … } #else static inline bool zone_is_zone_device(struct zone *zone) { return false; } #endif /* * Returns true if a zone has pages managed by the buddy allocator. * All the reclaim decisions have to use this function rather than * populated_zone(). If the whole zone is reserved then we can easily * end up with populated_zone() && !managed_zone(). */ static inline bool managed_zone(struct zone *zone) { … } /* Returns true if a zone has memory */ static inline bool populated_zone(struct zone *zone) { … } #ifdef CONFIG_NUMA static inline int zone_to_nid(struct zone *zone) { … } static inline void zone_set_nid(struct zone *zone, int nid) { … } #else static inline int zone_to_nid(struct zone *zone) { return 0; } static inline void zone_set_nid(struct zone *zone, int nid) {} #endif extern int movable_zone; static inline int is_highmem_idx(enum zone_type idx) { … } /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. * @zone: pointer to struct zone variable * Return: 1 for a highmem zone, 0 otherwise */ static inline int is_highmem(struct zone *zone) { … } #ifdef CONFIG_ZONE_DMA bool has_managed_dma(void); #else static inline bool has_managed_dma(void) { return false; } #endif #ifndef CONFIG_NUMA extern struct pglist_data contig_page_data; static inline struct pglist_data *NODE_DATA(int nid) { return &contig_page_data; } #else /* CONFIG_NUMA */ #include <asm/mmzone.h> #endif /* !CONFIG_NUMA */ extern struct pglist_data *first_online_pgdat(void); extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); extern struct zone *next_zone(struct zone *zone); /** * for_each_online_pgdat - helper macro to iterate over all online nodes * @pgdat: pointer to a pg_data_t variable */ #define for_each_online_pgdat(pgdat) … /** * for_each_zone - helper macro to iterate over all memory zones * @zone: pointer to struct zone variable * * The user only needs to declare the zone variable, for_each_zone * fills it in. */ #define for_each_zone(zone) … #define for_each_populated_zone(zone) … static inline struct zone *zonelist_zone(struct zoneref *zoneref) { … } static inline int zonelist_zone_idx(struct zoneref *zoneref) { … } static inline int zonelist_node_idx(struct zoneref *zoneref) { … } struct zoneref *__next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, nodemask_t *nodes); /** * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point * @z: The cursor used as a starting point for the search * @highest_zoneidx: The zone index of the highest zone to return * @nodes: An optional nodemask to filter the zonelist with * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the * search. The zoneref returned is a cursor that represents the current zone * being examined. It should be advanced by one before calling * next_zones_zonelist again. * * Return: the next zone at or below highest_zoneidx within the allowed * nodemask using a cursor within a zonelist as a starting point */ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, nodemask_t *nodes) { … } /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist * @zonelist: The zonelist to search for a suitable zone * @highest_zoneidx: The zone index of the highest zone to return * @nodes: An optional nodemask to filter the zonelist with * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be * used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. * * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is * never NULL). This may happen either genuinely, or due to concurrent nodemask * update due to cpuset modification. * * Return: Zoneref pointer for the first suitable zone found */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, nodemask_t *nodes) { … } /** * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask * @zone: The current zone in the iterator * @z: The current pointer within zonelist->_zonerefs being iterated * @zlist: The zonelist being iterated * @highidx: The zone index of the highest zone to return * @nodemask: Nodemask allowed by the allocator * * This iterator iterates though all zones at or below a given zone index and * within a given nodemask */ #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) … #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) … /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index * @zone: The current zone in the iterator * @z: The current pointer within zonelist->zones being iterated * @zlist: The zonelist being iterated * @highidx: The zone index of the highest zone to return * * This iterator iterates though all zones at or below a given zone index. */ #define for_each_zone_zonelist(zone, z, zlist, highidx) … /* Whether the 'nodes' are all movable nodes */ static inline bool movable_only_nodes(nodemask_t *nodes) { … } #ifdef CONFIG_SPARSEMEM #include <asm/sparsemem.h> #endif #ifdef CONFIG_FLATMEM #define pfn_to_nid … #endif #ifdef CONFIG_SPARSEMEM /* * PA_SECTION_SHIFT physical address to/from section number * PFN_SECTION_SHIFT pfn to/from section number */ #define PA_SECTION_SHIFT … #define PFN_SECTION_SHIFT … #define NR_MEM_SECTIONS … #define PAGES_PER_SECTION … #define PAGE_SECTION_MASK … #define SECTION_BLOCKFLAGS_BITS … #if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS #error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE #endif static inline unsigned long pfn_to_section_nr(unsigned long pfn) { … } static inline unsigned long section_nr_to_pfn(unsigned long sec) { … } #define SECTION_ALIGN_UP(pfn) … #define SECTION_ALIGN_DOWN(pfn) … #define SUBSECTION_SHIFT … #define SUBSECTION_SIZE … #define PFN_SUBSECTION_SHIFT … #define PAGES_PER_SUBSECTION … #define PAGE_SUBSECTION_MASK … #if SUBSECTION_SHIFT > SECTION_SIZE_BITS #error Subsection size exceeds section size #else #define SUBSECTIONS_PER_SECTION … #endif #define SUBSECTION_ALIGN_UP(pfn) … #define SUBSECTION_ALIGN_DOWN(pfn) … struct mem_section_usage { … }; void subsection_map_init(unsigned long pfn, unsigned long nr_pages); struct page; struct page_ext; struct mem_section { … }; #ifdef CONFIG_SPARSEMEM_EXTREME #define SECTIONS_PER_ROOT … #else #define SECTIONS_PER_ROOT … #endif #define SECTION_NR_TO_ROOT(sec) … #define NR_SECTION_ROOTS … #define SECTION_ROOT_MASK … #ifdef CONFIG_SPARSEMEM_EXTREME extern struct mem_section **mem_section; #else extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif static inline unsigned long *section_to_usemap(struct mem_section *ms) { … } static inline struct mem_section *__nr_to_section(unsigned long nr) { … } extern size_t mem_section_usage_size(void); /* * We use the lower bits of the mem_map pointer to store * a little bit of information. The pointer is calculated * as mem_map - section_nr_to_pfn(pnum). The result is * aligned to the minimum alignment of the two values: * 1. All mem_map arrays are page-aligned. * 2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT * lowest bits. PFN_SECTION_SHIFT is arch-specific * (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the * worst combination is powerpc with 256k pages, * which results in PFN_SECTION_SHIFT equal 6. * To sum it up, at least 6 bits are available on all architectures. * However, we can exceed 6 bits on some other architectures except * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available * with the worst case of 64K pages on arm64) if we make sure the * exceeded bit is not applicable to powerpc. */ enum { … }; #define SECTION_MARKED_PRESENT … #define SECTION_HAS_MEM_MAP … #define SECTION_IS_ONLINE … #define SECTION_IS_EARLY … #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE … #endif #define SECTION_MAP_MASK … #define SECTION_NID_SHIFT … static inline struct page *__section_mem_map_addr(struct mem_section *section) { … } static inline int present_section(struct mem_section *section) { … } static inline int present_section_nr(unsigned long nr) { … } static inline int valid_section(struct mem_section *section) { … } static inline int early_section(struct mem_section *section) { … } static inline int valid_section_nr(unsigned long nr) { … } static inline int online_section(struct mem_section *section) { … } #ifdef CONFIG_ZONE_DEVICE static inline int online_device_section(struct mem_section *section) { … } #else static inline int online_device_section(struct mem_section *section) { return 0; } #endif static inline int online_section_nr(unsigned long nr) { … } #ifdef CONFIG_MEMORY_HOTPLUG void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn); void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn); #endif static inline struct mem_section *__pfn_to_section(unsigned long pfn) { … } extern unsigned long __highest_present_section_nr; static inline int subsection_map_index(unsigned long pfn) { … } #ifdef CONFIG_SPARSEMEM_VMEMMAP static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { … } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { return 1; } #endif #ifndef CONFIG_HAVE_ARCH_PFN_VALID /** * pfn_valid - check if there is a valid memory map entry for a PFN * @pfn: the page frame number to check * * Check if there is a valid memory map entry aka struct page for the @pfn. * Note, that availability of the memory map entry does not imply that * there is actual usable memory at that @pfn. The struct page may * represent a hole or an unusable page frame. * * Return: 1 for PFNs that have memory map entries and 0 otherwise */ static inline int pfn_valid(unsigned long pfn) { … } #endif static inline int pfn_in_present_section(unsigned long pfn) { … } static inline unsigned long next_present_section_nr(unsigned long section_nr) { … } /* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate * this restriction. */ #ifdef CONFIG_NUMA #define pfn_to_nid(pfn) … #else #define pfn_to_nid … #endif void sparse_init(void); #else #define sparse_init … #define sparse_index_init … #define pfn_in_present_section … #define subsection_map_init … #endif /* CONFIG_SPARSEMEM */ #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */