// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <linux/kernel.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
#include <unistd.h>
#include <sys/resource.h>
#include <linux/close_range.h>
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
#ifndef F_LINUX_SPECIFIC_BASE
#define F_LINUX_SPECIFIC_BASE 1024
#endif
#ifndef F_DUPFD_QUERY
#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
#endif
#ifndef F_CREATED_QUERY
#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
#endif
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
return syscall(__NR_close_range, fd, max_fd, flags);
}
TEST(core_close_range)
{
int i, ret;
int open_fds[101];
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
int fd;
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
SKIP(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
}
EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
if (errno == ENOSYS)
SKIP(return, "close_range() syscall not supported");
}
for (i = 0; i < 100; i++) {
ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
if (ret < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(ret, 0);
}
}
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
for (i = 0; i <= 50; i++)
EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
for (i = 51; i <= 100; i++)
EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
/* create a couple of gaps */
close(57);
close(78);
close(81);
close(82);
close(84);
close(90);
EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
for (i = 51; i <= 92; i++)
EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
for (i = 93; i <= 100; i++)
EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
/* test that the kernel caps and still closes all fds */
EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
for (i = 93; i <= 99; i++)
EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
}
TEST(close_range_unshare)
{
int i, ret, status;
pid_t pid;
int open_fds[101];
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
int fd;
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
SKIP(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
}
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0) {
ret = sys_close_range(open_fds[0], open_fds[50],
CLOSE_RANGE_UNSHARE);
if (ret)
exit(EXIT_FAILURE);
for (i = 0; i <= 50; i++)
if (fcntl(open_fds[i], F_GETFL) != -1)
exit(EXIT_FAILURE);
for (i = 51; i <= 100; i++)
if (fcntl(open_fds[i], F_GETFL) == -1)
exit(EXIT_FAILURE);
/* create a couple of gaps */
close(57);
close(78);
close(81);
close(82);
close(84);
close(90);
ret = sys_close_range(open_fds[51], open_fds[92],
CLOSE_RANGE_UNSHARE);
if (ret)
exit(EXIT_FAILURE);
for (i = 51; i <= 92; i++)
if (fcntl(open_fds[i], F_GETFL) != -1)
exit(EXIT_FAILURE);
for (i = 93; i <= 100; i++)
if (fcntl(open_fds[i], F_GETFL) == -1)
exit(EXIT_FAILURE);
/* test that the kernel caps and still closes all fds */
ret = sys_close_range(open_fds[93], open_fds[99],
CLOSE_RANGE_UNSHARE);
if (ret)
exit(EXIT_FAILURE);
for (i = 93; i <= 99; i++)
if (fcntl(open_fds[i], F_GETFL) != -1)
exit(EXIT_FAILURE);
if (fcntl(open_fds[100], F_GETFL) == -1)
exit(EXIT_FAILURE);
ret = sys_close_range(open_fds[100], open_fds[100],
CLOSE_RANGE_UNSHARE);
if (ret)
exit(EXIT_FAILURE);
if (fcntl(open_fds[100], F_GETFL) != -1)
exit(EXIT_FAILURE);
exit(EXIT_SUCCESS);
}
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}
TEST(close_range_unshare_capped)
{
int i, ret, status;
pid_t pid;
int open_fds[101];
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
int fd;
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
SKIP(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
}
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0) {
ret = sys_close_range(open_fds[0], UINT_MAX,
CLOSE_RANGE_UNSHARE);
if (ret)
exit(EXIT_FAILURE);
for (i = 0; i <= 100; i++)
if (fcntl(open_fds[i], F_GETFL) != -1)
exit(EXIT_FAILURE);
exit(EXIT_SUCCESS);
}
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}
TEST(close_range_cloexec)
{
int i, ret;
int open_fds[101];
struct rlimit rlimit;
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
int fd;
fd = open("/dev/null", O_RDONLY);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
SKIP(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
}
ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
if (ret < 0) {
if (errno == ENOSYS)
SKIP(return, "close_range() syscall not supported");
if (errno == EINVAL)
SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
}
/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
rlimit.rlim_cur = 25;
ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
/* Set close-on-exec for two ranges: [0-50] and [75-100]. */
ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
ASSERT_EQ(0, ret);
ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
ASSERT_EQ(0, ret);
for (i = 0; i <= 50; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
for (i = 51; i <= 74; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
}
for (i = 75; i <= 100; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
/* Test a common pattern. */
ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
for (i = 0; i <= 100; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
}
TEST(close_range_cloexec_unshare)
{
int i, ret;
int open_fds[101];
struct rlimit rlimit;
for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
int fd;
fd = open("/dev/null", O_RDONLY);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
SKIP(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
}
ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
if (ret < 0) {
if (errno == ENOSYS)
SKIP(return, "close_range() syscall not supported");
if (errno == EINVAL)
SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
}
/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
rlimit.rlim_cur = 25;
ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
/* Set close-on-exec for two ranges: [0-50] and [75-100]. */
ret = sys_close_range(open_fds[0], open_fds[50],
CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
ASSERT_EQ(0, ret);
ret = sys_close_range(open_fds[75], open_fds[100],
CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
ASSERT_EQ(0, ret);
for (i = 0; i <= 50; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
for (i = 51; i <= 74; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
}
for (i = 75; i <= 100; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
/* Test a common pattern. */
ret = sys_close_range(3, UINT_MAX,
CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
for (i = 0; i <= 100; i++) {
int flags = fcntl(open_fds[i], F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
}
}
/*
* Regression test for [email protected]
*/
TEST(close_range_cloexec_syzbot)
{
int fd1, fd2, fd3, fd4, flags, ret, status;
pid_t pid;
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
/* Create a huge gap in the fd table. */
fd1 = open("/dev/null", O_RDWR);
EXPECT_GT(fd1, 0);
fd2 = dup2(fd1, 1000);
EXPECT_GT(fd2, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0) {
ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
if (ret)
exit(EXIT_FAILURE);
/*
* We now have a private file descriptor table and all
* our open fds should still be open but made
* close-on-exec.
*/
flags = fcntl(fd1, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
flags = fcntl(fd2, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
/*
* Duplicating the file descriptor must remove the
* FD_CLOEXEC flag.
*/
flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
exit(EXIT_SUCCESS);
}
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
/*
* We had a shared file descriptor table before along with requesting
* close-on-exec so the original fds must not be close-on-exec.
*/
flags = fcntl(fd1, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
flags = fcntl(fd2, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 1);
}
fd4 = open("/dev/null", O_RDWR);
EXPECT_GT(fd4, 0);
/* Same inode, different file pointers. */
flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
if (flags < 0) {
EXPECT_EQ(errno, EINVAL);
} else {
EXPECT_EQ(flags, 0);
}
flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
EXPECT_EQ(close(fd1), 0);
EXPECT_EQ(close(fd2), 0);
EXPECT_EQ(close(fd3), 0);
EXPECT_EQ(close(fd4), 0);
}
/*
* Regression test for [email protected]
*/
TEST(close_range_cloexec_unshare_syzbot)
{
int i, fd1, fd2, fd3, flags, ret, status;
pid_t pid;
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
/*
* Create a huge gap in the fd table. When we now call
* CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
* bound the kernel will only copy up to fd1 file descriptors into the
* new fd table. If the kernel is buggy and doesn't handle
* CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
* descriptors and we will oops!
*
* On a buggy kernel this should immediately oops. But let's loop just
* to be sure.
*/
fd1 = open("/dev/null", O_RDWR);
EXPECT_GT(fd1, 0);
fd2 = dup2(fd1, 1000);
EXPECT_GT(fd2, 0);
for (i = 0; i < 100; i++) {
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0) {
ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
CLOSE_RANGE_CLOEXEC);
if (ret)
exit(EXIT_FAILURE);
/*
* We now have a private file descriptor table and all
* our open fds should still be open but made
* close-on-exec.
*/
flags = fcntl(fd1, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
flags = fcntl(fd2, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
/*
* Duplicating the file descriptor must remove the
* FD_CLOEXEC flag.
*/
flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
EXPECT_EQ(close(fd1), 0);
EXPECT_EQ(close(fd2), 0);
EXPECT_EQ(close(fd3), 0);
exit(EXIT_SUCCESS);
}
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}
/*
* We created a private file descriptor table before along with
* requesting close-on-exec so the original fds must not be
* close-on-exec.
*/
flags = fcntl(fd1, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
flags = fcntl(fd2, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
EXPECT_EQ(close(fd1), 0);
EXPECT_EQ(close(fd2), 0);
EXPECT_EQ(close(fd3), 0);
}
TEST(close_range_bitmap_corruption)
{
pid_t pid;
int status;
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
/* get the first 128 descriptors open */
for (int i = 2; i < 128; i++)
EXPECT_GE(dup2(0, i), 0);
/* get descriptor table shared */
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0) {
/* unshare and truncate descriptor table down to 64 */
if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
exit(EXIT_FAILURE);
ASSERT_EQ(fcntl(64, F_GETFD), -1);
/* ... and verify that the range 64..127 is not
stuck "fully used" according to secondary bitmap */
EXPECT_EQ(dup(0), 64)
exit(EXIT_FAILURE);
exit(EXIT_SUCCESS);
}
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}
TEST(fcntl_created)
{
for (int i = 0; i < 101; i++) {
int fd;
char path[PATH_MAX];
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
SKIP(return,
"Skipping test since /dev/null does not exist");
}
/* We didn't create "/dev/null". */
EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
close(fd);
sprintf(path, "aaaa_%d", i);
fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600);
ASSERT_GE(fd, 0);
/* We created "aaaa_%d". */
EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1);
close(fd);
fd = open(path, O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0);
/* We're opening it again, so no positive creation check. */
EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
close(fd);
unlink(path);
}
}
TEST_HARNESS_MAIN