#include "perfetto/ext/base/platform.h"
#include "perfetto/ext/base/watchdog.h"
#if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/timerfd.h>
#include <unistd.h>
#include <algorithm>
#include <cinttypes>
#include <fstream>
#include <thread>
#include "perfetto/base/build_config.h"
#include "perfetto/base/logging.h"
#include "perfetto/base/thread_utils.h"
#include "perfetto/base/time.h"
#include "perfetto/ext/base/crash_keys.h"
#include "perfetto/ext/base/file_utils.h"
#include "perfetto/ext/base/scoped_file.h"
#include "perfetto/ext/base/utils.h"
namespace perfetto {
namespace base {
namespace {
constexpr uint32_t kDefaultPollingInterval = 30 * 1000;
base::CrashKey g_crash_key_reason("wdog_reason");
bool IsMultipleOf(uint32_t number, uint32_t divisor) {
return number >= divisor && number % divisor == 0;
}
double MeanForArray(const uint64_t array[], size_t size) {
uint64_t total = 0;
for (size_t i = 0; i < size; i++) {
total += array[i];
}
return static_cast<double>(total / size);
}
}
bool ReadProcStat(int fd, ProcStat* out) {
char c[512];
size_t c_pos = 0;
while (c_pos < sizeof(c) - 1) {
ssize_t rd = PERFETTO_EINTR(read(fd, c + c_pos, sizeof(c) - c_pos));
if (rd < 0) {
PERFETTO_ELOG("Failed to read stat file to enforce resource limits.");
return false;
}
if (rd == 0)
break;
c_pos += static_cast<size_t>(rd);
}
PERFETTO_CHECK(c_pos < sizeof(c));
c[c_pos] = '\0';
if (sscanf(c,
"%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu "
"%lu %*d %*d %*d %*d %*d %*d %*u %*u %ld",
&out->utime, &out->stime, &out->rss_pages) != 3) {
PERFETTO_ELOG("Invalid stat format: %s", c);
return false;
}
return true;
}
Watchdog::Watchdog(uint32_t polling_interval_ms)
: polling_interval_ms_(polling_interval_ms) {}
Watchdog::~Watchdog() {
if (!thread_.joinable()) {
PERFETTO_DCHECK(!enabled_);
return;
}
PERFETTO_DCHECK(enabled_);
enabled_ = false;
struct itimerspec ts {};
ts.it_value.tv_sec = 0;
ts.it_value.tv_nsec = 1;
timerfd_settime(*timer_fd_, 0, &ts, nullptr);
thread_.join();
}
Watchdog* Watchdog::GetInstance() {
static Watchdog* watchdog = new Watchdog(kDefaultPollingInterval);
return watchdog;
}
Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms,
WatchdogCrashReason crash_reason) {
if (!enabled_.load(std::memory_order_relaxed))
return Watchdog::Timer(this, 0, crash_reason);
return Watchdog::Timer(this, ms, crash_reason);
}
void Watchdog::AddFatalTimer(TimerData timer) {
std::lock_guard<std::mutex> guard(mutex_);
timers_.emplace_back(std::move(timer));
RearmTimerFd_Locked();
}
void Watchdog::RemoveFatalTimer(TimerData timer) {
std::lock_guard<std::mutex> guard(mutex_);
for (auto it = timers_.begin(); it != timers_.end(); it++) {
if (*it == timer) {
timers_.erase(it);
break;
}
}
RearmTimerFd_Locked();
}
void Watchdog::RearmTimerFd_Locked() {
if (!enabled_)
return;
auto it = std::min_element(timers_.begin(), timers_.end());
struct itimerspec ts {};
if (it != timers_.end()) {
ts.it_value = ToPosixTimespec(it->deadline);
}
int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr);
PERFETTO_DCHECK(res == 0);
}
void Watchdog::Start() {
std::lock_guard<std::mutex> guard(mutex_);
if (thread_.joinable()) {
PERFETTO_DCHECK(enabled_);
} else {
PERFETTO_DCHECK(!enabled_);
#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
timer_fd_.reset(
timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK));
if (!timer_fd_) {
PERFETTO_PLOG(
"timerfd_create failed, the Perfetto watchdog is not available");
return;
}
enabled_ = true;
RearmTimerFd_Locked();
thread_ = std::thread(&Watchdog::ThreadMain, this);
#endif
}
}
void Watchdog::SetMemoryLimit(uint64_t bytes, uint32_t window_ms) {
std::lock_guard<std::mutex> guard(mutex_);
PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || bytes == 0);
size_t size = bytes == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
memory_window_bytes_.Reset(size);
memory_limit_bytes_ = bytes;
}
void Watchdog::SetCpuLimit(uint32_t percentage, uint32_t window_ms) {
std::lock_guard<std::mutex> guard(mutex_);
PERFETTO_CHECK(percentage <= 100);
PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) ||
percentage == 0);
size_t size = percentage == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
cpu_window_time_ticks_.Reset(size);
cpu_limit_percentage_ = percentage;
}
void Watchdog::ThreadMain() {
g_crash_key_reason.Register();
base::ScopedFile stat_fd(base::OpenFile("/proc/self/stat", O_RDONLY));
if (!stat_fd) {
PERFETTO_ELOG("Failed to open stat file to enforce resource limits.");
return;
}
PERFETTO_DCHECK(timer_fd_);
constexpr uint8_t kFdCount = 1;
struct pollfd fds[kFdCount]{};
fds[0].fd = *timer_fd_;
fds[0].events = POLLIN;
for (;;) {
platform::BeforeMaybeBlockingSyscall();
auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_));
platform::AfterMaybeBlockingSyscall();
if (!enabled_)
return;
if (ret < 0) {
if (errno == ENOMEM || errno == EINTR) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
continue;
}
PERFETTO_FATAL("watchdog poll() failed");
}
uint64_t expired = 0;
auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired)));
PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) ||
(res == sizeof(expired) && expired > 0));
const auto now = GetWallTimeMs();
int tid_to_kill = 0;
WatchdogCrashReason crash_reason{};
std::unique_lock<std::mutex> guard(mutex_);
for (const auto& timer : timers_) {
if (now >= timer.deadline) {
tid_to_kill = timer.thread_id;
crash_reason = timer.crash_reason;
break;
}
}
guard.unlock();
if (tid_to_kill)
SerializeLogsAndKillThread(tid_to_kill, crash_reason);
lseek(stat_fd.get(), 0, SEEK_SET);
ProcStat stat;
if (!ReadProcStat(stat_fd.get(), &stat))
continue;
uint64_t cpu_time = stat.utime + stat.stime;
uint64_t rss_bytes =
static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize();
bool threshold_exceeded = false;
guard.lock();
if (CheckMemory_Locked(rss_bytes) && !IsSyncMemoryTaggingEnabled()) {
threshold_exceeded = true;
crash_reason = WatchdogCrashReason::kMemGuardrail;
} else if (CheckCpu_Locked(cpu_time)) {
threshold_exceeded = true;
crash_reason = WatchdogCrashReason::kCpuGuardrail;
}
guard.unlock();
if (threshold_exceeded)
SerializeLogsAndKillThread(getpid(), crash_reason);
}
}
void Watchdog::SerializeLogsAndKillThread(int tid,
WatchdogCrashReason crash_reason) {
g_crash_key_reason.Set(static_cast<int>(crash_reason));
MaybeSerializeLastLogsForCrashReporting();
if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) {
abort();
}
if (disable_kill_failsafe_for_testing_)
return;
std::this_thread::sleep_for(std::chrono::seconds(10));
abort();
}
bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) {
if (memory_limit_bytes_ == 0)
return false;
if (memory_window_bytes_.Push(rss_bytes)) {
if (memory_window_bytes_.Mean() >
static_cast<double>(memory_limit_bytes_)) {
PERFETTO_ELOG(
"Memory watchdog trigger. Memory window of %f bytes is above the "
"%" PRIu64 " bytes limit.",
memory_window_bytes_.Mean(), memory_limit_bytes_);
return true;
}
}
return false;
}
bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) {
if (cpu_limit_percentage_ == 0)
return false;
if (cpu_window_time_ticks_.Push(cpu_time)) {
uint64_t difference_ticks = cpu_window_time_ticks_.NewestWhenFull() -
cpu_window_time_ticks_.OldestWhenFull();
double window_interval_ticks =
(static_cast<double>(WindowTimeForRingBuffer(cpu_window_time_ticks_)) /
1000.0) *
static_cast<double>(sysconf(_SC_CLK_TCK));
double percentage = static_cast<double>(difference_ticks) /
static_cast<double>(window_interval_ticks) * 100;
if (percentage > cpu_limit_percentage_) {
PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32
"%% CPU limit.",
percentage, cpu_limit_percentage_);
return true;
}
}
return false;
}
uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) {
return static_cast<uint32_t>(window.size() - 1) * polling_interval_ms_;
}
bool Watchdog::WindowedInterval::Push(uint64_t sample) {
buffer_[position_] = sample;
position_ = (position_ + 1) % size_;
filled_ = filled_ || position_ == 0;
return filled_;
}
double Watchdog::WindowedInterval::Mean() const {
return MeanForArray(buffer_.get(), size_);
}
void Watchdog::WindowedInterval::Clear() {
position_ = 0;
buffer_.reset(new uint64_t[size_]());
}
void Watchdog::WindowedInterval::Reset(size_t new_size) {
position_ = 0;
size_ = new_size;
buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]());
}
Watchdog::Timer::Timer(Watchdog* watchdog,
uint32_t ms,
WatchdogCrashReason crash_reason)
: watchdog_(watchdog) {
if (!ms)
return;
timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms);
timer_data_.thread_id = GetThreadId();
timer_data_.crash_reason = crash_reason;
PERFETTO_DCHECK(watchdog_);
watchdog_->AddFatalTimer(timer_data_);
}
Watchdog::Timer::~Timer() {
if (timer_data_.deadline.count())
watchdog_->RemoveFatalTimer(timer_data_);
}
Watchdog::Timer::Timer(Timer&& other) noexcept {
watchdog_ = std::move(other.watchdog_);
other.watchdog_ = nullptr;
timer_data_ = std::move(other.timer_data_);
other.timer_data_ = TimerData();
}
}
}
#endif