// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]>
*/
#define _GNU_SOURCE
#include <dirent.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include "utils.h"
#define MAX_MSG_LENGTH 1024
int config_debug;
/*
* err_msg - print an error message to the stderr
*/
void err_msg(const char *fmt, ...)
{
char message[MAX_MSG_LENGTH];
va_list ap;
va_start(ap, fmt);
vsnprintf(message, sizeof(message), fmt, ap);
va_end(ap);
fprintf(stderr, "%s", message);
}
/*
* debug_msg - print a debug message to stderr if debug is set
*/
void debug_msg(const char *fmt, ...)
{
char message[MAX_MSG_LENGTH];
va_list ap;
if (!config_debug)
return;
va_start(ap, fmt);
vsnprintf(message, sizeof(message), fmt, ap);
va_end(ap);
fprintf(stderr, "%s", message);
}
/*
* get_llong_from_str - get a long long int from a string
*/
long long get_llong_from_str(char *start)
{
long long value;
char *end;
errno = 0;
value = strtoll(start, &end, 10);
if (errno || start == end)
return -1;
return value;
}
/*
* get_duration - fill output with a human readable duration since start_time
*/
void get_duration(time_t start_time, char *output, int output_size)
{
time_t now = time(NULL);
struct tm *tm_info;
time_t duration;
duration = difftime(now, start_time);
tm_info = gmtime(&duration);
snprintf(output, output_size, "%3d %02d:%02d:%02d",
tm_info->tm_yday,
tm_info->tm_hour,
tm_info->tm_min,
tm_info->tm_sec);
}
/*
* parse_cpu_set - parse a cpu_list filling cpu_set_t argument
*
* Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
* filling cpu_set_t argument.
*
* Returns 1 on success, 0 otherwise.
*/
int parse_cpu_set(char *cpu_list, cpu_set_t *set)
{
const char *p;
int end_cpu;
int nr_cpus;
int cpu;
int i;
CPU_ZERO(set);
nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
for (p = cpu_list; *p; ) {
cpu = atoi(p);
if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
goto err;
while (isdigit(*p))
p++;
if (*p == '-') {
p++;
end_cpu = atoi(p);
if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
goto err;
while (isdigit(*p))
p++;
} else
end_cpu = cpu;
if (cpu == end_cpu) {
debug_msg("cpu_set: adding cpu %d\n", cpu);
CPU_SET(cpu, set);
} else {
for (i = cpu; i <= end_cpu; i++) {
debug_msg("cpu_set: adding cpu %d\n", i);
CPU_SET(i, set);
}
}
if (*p == ',')
p++;
}
return 0;
err:
debug_msg("Error parsing the cpu set %s\n", cpu_list);
return 1;
}
/*
* parse_duration - parse duration with s/m/h/d suffix converting it to seconds
*/
long parse_seconds_duration(char *val)
{
char *end;
long t;
t = strtol(val, &end, 10);
if (end) {
switch (*end) {
case 's':
case 'S':
break;
case 'm':
case 'M':
t *= 60;
break;
case 'h':
case 'H':
t *= 60 * 60;
break;
case 'd':
case 'D':
t *= 24 * 60 * 60;
break;
}
}
return t;
}
/*
* parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
*/
long parse_ns_duration(char *val)
{
char *end;
long t;
t = strtol(val, &end, 10);
if (end) {
if (!strncmp(end, "ns", 2)) {
return t;
} else if (!strncmp(end, "us", 2)) {
t *= 1000;
return t;
} else if (!strncmp(end, "ms", 2)) {
t *= 1000 * 1000;
return t;
} else if (!strncmp(end, "s", 1)) {
t *= 1000 * 1000 * 1000;
return t;
}
return -1;
}
return t;
}
/*
* This is a set of helper functions to use SCHED_DEADLINE.
*/
#ifdef __x86_64__
# define __NR_sched_setattr 314
# define __NR_sched_getattr 315
#elif __i386__
# define __NR_sched_setattr 351
# define __NR_sched_getattr 352
#elif __arm__
# define __NR_sched_setattr 380
# define __NR_sched_getattr 381
#elif __aarch64__ || __riscv
# define __NR_sched_setattr 274
# define __NR_sched_getattr 275
#elif __powerpc__
# define __NR_sched_setattr 355
# define __NR_sched_getattr 356
#elif __s390x__
# define __NR_sched_setattr 345
# define __NR_sched_getattr 346
#endif
#define SCHED_DEADLINE 6
static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
unsigned int flags) {
return syscall(__NR_sched_setattr, pid, attr, flags);
}
int __set_sched_attr(int pid, struct sched_attr *attr)
{
int flags = 0;
int retval;
retval = sched_setattr(pid, attr, flags);
if (retval < 0) {
err_msg("Failed to set sched attributes to the pid %d: %s\n",
pid, strerror(errno));
return 1;
}
return 0;
}
/*
* procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
*
* Check if the procfs entry is a directory of a process, and then check if the
* process has a comm with the prefix set in char *comm_prefix. As the
* current users of this function only check for kernel threads, there is no
* need to check for the threads for the process.
*
* Return: True if the proc_entry contains a comm file with comm_prefix*.
* Otherwise returns false.
*/
static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
{
char buffer[MAX_PATH];
int comm_fd, retval;
char *t_name;
if (proc_entry->d_type != DT_DIR)
return 0;
if (*proc_entry->d_name == '.')
return 0;
/* check if the string is a pid */
for (t_name = proc_entry->d_name; t_name; t_name++) {
if (!isdigit(*t_name))
break;
}
if (*t_name != '\0')
return 0;
snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
comm_fd = open(buffer, O_RDONLY);
if (comm_fd < 0)
return 0;
memset(buffer, 0, MAX_PATH);
retval = read(comm_fd, buffer, MAX_PATH);
close(comm_fd);
if (retval <= 0)
return 0;
retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
if (retval)
return 0;
/* comm already have \n */
debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
return 1;
}
/*
* set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
*
* This function uses procfs to list the currently running threads and then set the
* sched_attr *attr to the threads that start with char *comm_prefix. It is
* mainly used to set the priority to the kernel threads created by the
* tracers.
*/
int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
{
struct dirent *proc_entry;
DIR *procfs;
int retval;
if (strlen(comm_prefix) >= MAX_PATH) {
err_msg("Command prefix is too long: %d < strlen(%s)\n",
MAX_PATH, comm_prefix);
return 1;
}
procfs = opendir("/proc");
if (!procfs) {
err_msg("Could not open procfs\n");
return 1;
}
while ((proc_entry = readdir(procfs))) {
retval = procfs_is_workload_pid(comm_prefix, proc_entry);
if (!retval)
continue;
/* procfs_is_workload_pid confirmed it is a pid */
retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
if (retval) {
err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
goto out_err;
}
debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
}
return 0;
out_err:
closedir(procfs);
return 1;
}
#define INVALID_VAL (~0L)
static long get_long_ns_after_colon(char *start)
{
long val = INVALID_VAL;
/* find the ":" */
start = strstr(start, ":");
if (!start)
return -1;
/* skip ":" */
start++;
val = parse_ns_duration(start);
return val;
}
static long get_long_after_colon(char *start)
{
long val = INVALID_VAL;
/* find the ":" */
start = strstr(start, ":");
if (!start)
return -1;
/* skip ":" */
start++;
val = get_llong_from_str(start);
return val;
}
/*
* parse priority in the format:
* SCHED_OTHER:
* o:<prio>
* O:<prio>
* SCHED_RR:
* r:<prio>
* R:<prio>
* SCHED_FIFO:
* f:<prio>
* F:<prio>
* SCHED_DEADLINE:
* d:runtime:period
* D:runtime:period
*/
int parse_prio(char *arg, struct sched_attr *sched_param)
{
long prio;
long runtime;
long period;
memset(sched_param, 0, sizeof(*sched_param));
sched_param->size = sizeof(*sched_param);
switch (arg[0]) {
case 'd':
case 'D':
/* d:runtime:period */
if (strlen(arg) < 4)
return -1;
runtime = get_long_ns_after_colon(arg);
if (runtime == INVALID_VAL)
return -1;
period = get_long_ns_after_colon(&arg[2]);
if (period == INVALID_VAL)
return -1;
if (runtime > period)
return -1;
sched_param->sched_policy = SCHED_DEADLINE;
sched_param->sched_runtime = runtime;
sched_param->sched_deadline = period;
sched_param->sched_period = period;
break;
case 'f':
case 'F':
/* f:prio */
prio = get_long_after_colon(arg);
if (prio == INVALID_VAL)
return -1;
if (prio < sched_get_priority_min(SCHED_FIFO))
return -1;
if (prio > sched_get_priority_max(SCHED_FIFO))
return -1;
sched_param->sched_policy = SCHED_FIFO;
sched_param->sched_priority = prio;
break;
case 'r':
case 'R':
/* r:prio */
prio = get_long_after_colon(arg);
if (prio == INVALID_VAL)
return -1;
if (prio < sched_get_priority_min(SCHED_RR))
return -1;
if (prio > sched_get_priority_max(SCHED_RR))
return -1;
sched_param->sched_policy = SCHED_RR;
sched_param->sched_priority = prio;
break;
case 'o':
case 'O':
/* o:prio */
prio = get_long_after_colon(arg);
if (prio == INVALID_VAL)
return -1;
if (prio < MIN_NICE)
return -1;
if (prio > MAX_NICE)
return -1;
sched_param->sched_policy = SCHED_OTHER;
sched_param->sched_nice = prio;
break;
default:
return -1;
}
return 0;
}
/*
* set_cpu_dma_latency - set the /dev/cpu_dma_latecy
*
* This is used to reduce the exit from idle latency. The value
* will be reset once the file descriptor of /dev/cpu_dma_latecy
* is closed.
*
* Return: the /dev/cpu_dma_latecy file descriptor
*/
int set_cpu_dma_latency(int32_t latency)
{
int retval;
int fd;
fd = open("/dev/cpu_dma_latency", O_RDWR);
if (fd < 0) {
err_msg("Error opening /dev/cpu_dma_latency\n");
return -1;
}
retval = write(fd, &latency, 4);
if (retval < 1) {
err_msg("Error setting /dev/cpu_dma_latency\n");
close(fd);
return -1;
}
debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
return fd;
}
#define _STR(x) #x
#define STR(x) _STR(x)
/*
* find_mount - find a the mount point of a given fs
*
* Returns 0 if mount is not found, otherwise return 1 and fill mp
* with the mount point.
*/
static const int find_mount(const char *fs, char *mp, int sizeof_mp)
{
char mount_point[MAX_PATH+1];
char type[100];
int found = 0;
FILE *fp;
fp = fopen("/proc/mounts", "r");
if (!fp)
return 0;
while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
if (strcmp(type, fs) == 0) {
found = 1;
break;
}
}
fclose(fp);
if (!found)
return 0;
memset(mp, 0, sizeof_mp);
strncpy(mp, mount_point, sizeof_mp - 1);
debug_msg("Fs %s found at %s\n", fs, mp);
return 1;
}
/*
* get_self_cgroup - get the current thread cgroup path
*
* Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
*
* 0::/user.slice/user-0.slice/session-3.scope'\n'
*
* This function is interested in the content after the second : and before the '\n'.
*
* Returns 1 if a string was found, 0 otherwise.
*/
static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
{
char path[MAX_PATH], *start;
int fd, retval;
snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
fd = open(path, O_RDONLY);
if (fd < 0)
return 0;
retval = read(fd, path, MAX_PATH);
close(fd);
if (retval <= 0)
return 0;
start = path;
start = strstr(start, ":");
if (!start)
return 0;
/* skip ":" */
start++;
start = strstr(start, ":");
if (!start)
return 0;
/* skip ":" */
start++;
if (strlen(start) >= sizeof_self_cg)
return 0;
snprintf(self_cg, sizeof_self_cg, "%s", start);
/* Swap '\n' with '\0' */
start = strstr(self_cg, "\n");
/* there must be '\n' */
if (!start)
return 0;
/* ok, it found a string after the second : and before the \n */
*start = '\0';
return 1;
}
/*
* set_comm_cgroup - Set cgroup to pid_t pid
*
* If cgroup argument is not NULL, the threads will move to the given cgroup.
* Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
*
* Supports cgroup v2.
*
* Returns 1 on success, 0 otherwise.
*/
int set_pid_cgroup(pid_t pid, const char *cgroup)
{
char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
char cgroup_procs[MAX_PATH];
char pid_str[24];
int retval;
int cg_fd;
retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
if (!retval) {
err_msg("Did not find cgroupv2 mount point\n");
return 0;
}
if (!cgroup) {
retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
sizeof(cgroup_path) - strlen(cgroup_path));
if (!retval) {
err_msg("Did not find self cgroup\n");
return 0;
}
} else {
snprintf(&cgroup_path[strlen(cgroup_path)],
sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
}
snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
debug_msg("Using cgroup path at: %s\n", cgroup_procs);
cg_fd = open(cgroup_procs, O_RDWR);
if (cg_fd < 0)
return 0;
snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
retval = write(cg_fd, pid_str, strlen(pid_str));
if (retval < 0)
err_msg("Error setting cgroup attributes for pid:%s - %s\n",
pid_str, strerror(errno));
else
debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
close(cg_fd);
return (retval >= 0);
}
/**
* set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
*
* If cgroup argument is not NULL, the threads will move to the given cgroup.
* Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
*
* Supports cgroup v2.
*
* Returns 1 on success, 0 otherwise.
*/
int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
{
char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
char cgroup_procs[MAX_PATH];
struct dirent *proc_entry;
DIR *procfs;
int retval;
int cg_fd;
if (strlen(comm_prefix) >= MAX_PATH) {
err_msg("Command prefix is too long: %d < strlen(%s)\n",
MAX_PATH, comm_prefix);
return 0;
}
retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
if (!retval) {
err_msg("Did not find cgroupv2 mount point\n");
return 0;
}
if (!cgroup) {
retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
sizeof(cgroup_path) - strlen(cgroup_path));
if (!retval) {
err_msg("Did not find self cgroup\n");
return 0;
}
} else {
snprintf(&cgroup_path[strlen(cgroup_path)],
sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
}
snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
debug_msg("Using cgroup path at: %s\n", cgroup_procs);
cg_fd = open(cgroup_procs, O_RDWR);
if (cg_fd < 0)
return 0;
procfs = opendir("/proc");
if (!procfs) {
err_msg("Could not open procfs\n");
goto out_cg;
}
while ((proc_entry = readdir(procfs))) {
retval = procfs_is_workload_pid(comm_prefix, proc_entry);
if (!retval)
continue;
retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
if (retval < 0) {
err_msg("Error setting cgroup attributes for pid:%s - %s\n",
proc_entry->d_name, strerror(errno));
goto out_procfs;
}
debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
}
closedir(procfs);
close(cg_fd);
return 1;
out_procfs:
closedir(procfs);
out_cg:
close(cg_fd);
return 0;
}
/**
* auto_house_keeping - Automatically move rtla out of measurement threads
*
* Try to move rtla away from the tracer, if possible.
*
* Returns 1 on success, 0 otherwise.
*/
int auto_house_keeping(cpu_set_t *monitored_cpus)
{
cpu_set_t rtla_cpus, house_keeping_cpus;
int retval;
/* first get the CPUs in which rtla can actually run. */
retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
if (retval == -1) {
debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
return 0;
}
/* then check if the existing setup is already good. */
CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
if (!CPU_COUNT(&house_keeping_cpus)) {
debug_msg("rtla and the monitored CPUs do not share CPUs.");
debug_msg("Skipping auto house-keeping\n");
return 1;
}
/* remove the intersection */
CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
/* get only those that rtla can run */
CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
/* is there any cpu left? */
if (!CPU_COUNT(&house_keeping_cpus)) {
debug_msg("Could not find any CPU for auto house-keeping\n");
return 0;
}
retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
if (retval == -1) {
debug_msg("Could not set affinity for auto house-keeping\n");
return 0;
}
debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
return 1;
}