// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#include <linux/fs.h>
#include <sys/syscall.h>
#include <unistd.h>
#include "../kselftest.h"
#include "vm_util.h"
#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
#define SMAP_FILE_PATH "/proc/self/smaps"
#define MAX_LINE_LENGTH 500
unsigned int __page_size;
unsigned int __page_shift;
uint64_t pagemap_get_entry(int fd, char *start)
{
const unsigned long pfn = (unsigned long)start / getpagesize();
uint64_t entry;
int ret;
ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
if (ret != sizeof(entry))
ksft_exit_fail_msg("reading pagemap failed\n");
return entry;
}
static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r)
{
struct pm_scan_arg arg;
arg.start = (uintptr_t)start;
arg.end = (uintptr_t)(start + psize());
arg.vec = (uintptr_t)r;
arg.vec_len = 1;
arg.flags = 0;
arg.size = sizeof(struct pm_scan_arg);
arg.max_pages = 0;
arg.category_inverted = 0;
arg.category_mask = 0;
arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
arg.return_mask = arg.category_anyof_mask;
return ioctl(fd, PAGEMAP_SCAN, &arg);
}
static uint64_t pagemap_scan_get_categories(int fd, char *start)
{
struct page_region r;
long ret;
ret = __pagemap_scan_get_categories(fd, start, &r);
if (ret < 0)
ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
if (ret == 0)
return 0;
return r.categories;
}
/* `start` is any valid address. */
static bool pagemap_scan_supported(int fd, char *start)
{
static int supported = -1;
int ret;
if (supported != -1)
return supported;
/* Provide an invalid address in order to trigger EFAULT. */
ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL);
if (ret == 0)
ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n");
supported = errno == EFAULT;
return supported;
}
static bool page_entry_is(int fd, char *start, char *desc,
uint64_t pagemap_flags, uint64_t pagescan_flags)
{
bool m = pagemap_get_entry(fd, start) & pagemap_flags;
if (pagemap_scan_supported(fd, start)) {
bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags;
if (m == s)
return m;
ksft_exit_fail_msg(
"read and ioctl return unmatched results for %s: %d %d", desc, m, s);
}
return m;
}
bool pagemap_is_softdirty(int fd, char *start)
{
return page_entry_is(fd, start, "soft-dirty",
PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
}
bool pagemap_is_swapped(int fd, char *start)
{
return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
}
bool pagemap_is_populated(int fd, char *start)
{
return page_entry_is(fd, start, "populated",
PM_PRESENT | PM_SWAP,
PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
}
unsigned long pagemap_get_pfn(int fd, char *start)
{
uint64_t entry = pagemap_get_entry(fd, start);
/* If present (63th bit), PFN is at bit 0 -- 54. */
if (entry & PM_PRESENT)
return entry & 0x007fffffffffffffull;
return -1ul;
}
void clear_softdirty(void)
{
int ret;
const char *ctrl = "4";
int fd = open("/proc/self/clear_refs", O_WRONLY);
if (fd < 0)
ksft_exit_fail_msg("opening clear_refs failed\n");
ret = write(fd, ctrl, strlen(ctrl));
close(fd);
if (ret != strlen(ctrl))
ksft_exit_fail_msg("writing clear_refs failed\n");
}
bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
{
while (fgets(buf, len, fp)) {
if (!strncmp(buf, pattern, strlen(pattern)))
return true;
}
return false;
}
uint64_t read_pmd_pagesize(void)
{
int fd;
char buf[20];
ssize_t num_read;
fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
if (fd == -1)
return 0;
num_read = read(fd, buf, 19);
if (num_read < 1) {
close(fd);
return 0;
}
buf[num_read] = '\0';
close(fd);
return strtoul(buf, NULL, 10);
}
bool __check_huge(void *addr, char *pattern, int nr_hpages,
uint64_t hpage_size)
{
uint64_t thp = -1;
int ret;
FILE *fp;
char buffer[MAX_LINE_LENGTH];
char addr_pattern[MAX_LINE_LENGTH];
ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
(unsigned long) addr);
if (ret >= MAX_LINE_LENGTH)
ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
fp = fopen(SMAP_FILE_PATH, "r");
if (!fp)
ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
goto err_out;
/*
* Fetch the pattern in the same block and check the number of
* hugepages.
*/
if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
goto err_out;
snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);
if (sscanf(buffer, addr_pattern, &thp) != 1)
ksft_exit_fail_msg("Reading smap error\n");
err_out:
fclose(fp);
return thp == (nr_hpages * (hpage_size >> 10));
}
bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
{
return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
}
bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
{
return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
}
bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
{
return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
}
int64_t allocate_transhuge(void *ptr, int pagemap_fd)
{
uint64_t ent[2];
/* drop pmd */
if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_ANONYMOUS |
MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
ksft_exit_fail_msg("mmap transhuge\n");
if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
ksft_exit_fail_msg("MADV_HUGEPAGE\n");
/* allocate transparent huge page */
*(volatile void **)ptr = ptr;
if (pread(pagemap_fd, ent, sizeof(ent),
(uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent))
ksft_exit_fail_msg("read pagemap\n");
if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
!(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1)))
return PAGEMAP_PFN(ent[0]);
return -1;
}
unsigned long default_huge_page_size(void)
{
unsigned long hps = 0;
char *line = NULL;
size_t linelen = 0;
FILE *f = fopen("/proc/meminfo", "r");
if (!f)
return 0;
while (getline(&line, &linelen, f) > 0) {
if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
hps <<= 10;
break;
}
}
free(line);
fclose(f);
return hps;
}
int detect_hugetlb_page_sizes(size_t sizes[], int max)
{
DIR *dir = opendir("/sys/kernel/mm/hugepages/");
int count = 0;
if (!dir)
return 0;
while (count < max) {
struct dirent *entry = readdir(dir);
size_t kb;
if (!entry)
break;
if (entry->d_type != DT_DIR)
continue;
if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
continue;
sizes[count++] = kb * 1024;
ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
kb);
}
closedir(dir);
return count;
}
/* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls)
{
struct uffdio_register uffdio_register = { 0 };
uint64_t mode = 0;
int ret = 0;
if (miss)
mode |= UFFDIO_REGISTER_MODE_MISSING;
if (wp)
mode |= UFFDIO_REGISTER_MODE_WP;
if (minor)
mode |= UFFDIO_REGISTER_MODE_MINOR;
uffdio_register.range.start = (unsigned long)addr;
uffdio_register.range.len = len;
uffdio_register.mode = mode;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
ret = -errno;
else if (ioctls)
*ioctls = uffdio_register.ioctls;
return ret;
}
int uffd_register(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor)
{
return uffd_register_with_ioctls(uffd, addr, len,
miss, wp, minor, NULL);
}
int uffd_unregister(int uffd, void *addr, uint64_t len)
{
struct uffdio_range range = { .start = (uintptr_t)addr, .len = len };
int ret = 0;
if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1)
ret = -errno;
return ret;
}
unsigned long get_free_hugepages(void)
{
unsigned long fhp = 0;
char *line = NULL;
size_t linelen = 0;
FILE *f = fopen("/proc/meminfo", "r");
if (!f)
return fhp;
while (getline(&line, &linelen, f) > 0) {
if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
break;
}
free(line);
fclose(f);
return fhp;
}