chromium/ash/components/arc/session/arc_session_runner.cc

// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "ash/components/arc/session/arc_session_runner.h"

#include <optional>
#include <utility>

#include "ash/components/arc/arc_util.h"
#include "base/functional/bind.h"
#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/task/task_runner.h"

namespace arc {

namespace {

constexpr base::TimeDelta kDefaultRestartDelay = base::Seconds(5);

void RecordInstanceCrashUma(ArcContainerLifetimeEvent sample) {
  UMA_HISTOGRAM_ENUMERATION("Arc.ContainerLifetimeEvent", sample,
                            ArcContainerLifetimeEvent::COUNT);
  // Log the metric to facilitate finding feedback reports in Xamine.
  VLOG(1) << "Arc.ContainerLifetimeEvent: "
          << static_cast<std::underlying_type_t<ArcContainerLifetimeEvent>>(
                 sample);
}

void RecordInstanceRestartAfterCrashUma(size_t restart_after_crash_count) {
  UMA_HISTOGRAM_COUNTS_100("Arc.ContainerRestartAfterCrashCount",
                           restart_after_crash_count);
}

// Gets an ArcContainerLifetimeEvent value to record. Returns nullopt when no
// UMA recording is needed.
std::optional<ArcContainerLifetimeEvent> GetArcContainerLifetimeEvent(
    size_t restart_after_crash_count,
    ArcStopReason stop_reason,
    bool was_running) {
  // Record UMA only when this is the first non-early crash. This has to be
  // done before checking other conditions. Otherwise, an early crash after
  // container restart might be recorded. Each CONTAINER_STARTED event can
  // be paired up to one non-START event.
  if (restart_after_crash_count)
    return std::nullopt;

  switch (stop_reason) {
    case ArcStopReason::SHUTDOWN:
    case ArcStopReason::LOW_DISK_SPACE:
      // We don't record these events.
      return std::nullopt;
    case ArcStopReason::GENERIC_BOOT_FAILURE:
      return ArcContainerLifetimeEvent::CONTAINER_FAILED_TO_START;
    case ArcStopReason::CRASH:
      return was_running ? ArcContainerLifetimeEvent::CONTAINER_CRASHED
                         : ArcContainerLifetimeEvent::CONTAINER_CRASHED_EARLY;
  }

  NOTREACHED();
}

// Returns true if restart is needed for given conditions.
bool IsRestartNeeded(std::optional<ArcInstanceMode> target_mode,
                     ArcStopReason stop_reason,
                     bool was_running) {
  if (!target_mode.has_value()) {
    // The request to run ARC is canceled by the caller. No need to restart.
    return false;
  }

  switch (stop_reason) {
    case ArcStopReason::SHUTDOWN:
      // This is a part of stop requested by ArcSessionRunner.
      // If ARC is re-requested to start, restart is necessary.
      // This case happens, e.g., RequestStart() -> RequestStop() ->
      // RequestStart(), case. If the second RequestStart() is called before
      // the instance previously running is stopped, then just |target_mode_|
      // is set. On completion, restart is needed.
      return true;
    case ArcStopReason::GENERIC_BOOT_FAILURE:
    case ArcStopReason::LOW_DISK_SPACE:
      // These two are errors on starting. To prevent failure loop, do not
      // restart.
      return false;
    case ArcStopReason::CRASH:
      // ARC instance is crashed unexpectedly, so automatically restart.
      // However, to avoid crash loop, do not restart if it is not successfully
      // started yet. So, check |was_running|.
      return was_running;
  }

  NOTREACHED();
}

// Returns true if the request to start/upgrade ARC instance is allowed
// operation.
bool IsRequestAllowed(const std::optional<ArcInstanceMode>& current_mode,
                      ArcInstanceMode request_mode) {
  if (!current_mode.has_value()) {
    // This is a request to start a new ARC instance (either mini instance
    // or full instance).
    return true;
  }

  if (current_mode == ArcInstanceMode::MINI_INSTANCE &&
      request_mode == ArcInstanceMode::FULL_INSTANCE) {
    // This is a request to upgrade the running mini instance to full instance.
    return true;
  }

  // Otherwise, not allowed.
  LOG(ERROR) << "Unexpected ARC instance mode transition request: "
             << current_mode << " -> " << request_mode;
  return false;
}

}  // namespace

ArcSessionRunner::ArcSessionRunner(const ArcSessionFactory& factory)
    : restart_delay_(kDefaultRestartDelay),
      restart_after_crash_count_(0),
      factory_(factory) {}

ArcSessionRunner::~ArcSessionRunner() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  if (arc_session_)
    arc_session_->RemoveObserver(this);
}

void ArcSessionRunner::AddObserver(Observer* observer) {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  observer_list_.AddObserver(observer);
}

void ArcSessionRunner::RemoveObserver(Observer* observer) {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  observer_list_.RemoveObserver(observer);
}

void ArcSessionRunner::ResumeRunner() {
  VLOG(1) << "ArcSessionRunner is resumed";
  resumed_ = true;
  if (target_mode_) {
    ArcInstanceMode original_mode = *target_mode_;
    target_mode_ = std::nullopt;
    RequestStart(original_mode);
  }
}

void ArcSessionRunner::RequestStartMiniInstance() {
  RequestStart(ArcInstanceMode::MINI_INSTANCE);
}

void ArcSessionRunner::RequestUpgrade(UpgradeParams params) {
  upgrade_params_ = std::move(params);
  RequestStart(ArcInstanceMode::FULL_INSTANCE);
}

void ArcSessionRunner::RequestStart(ArcInstanceMode request_mode) {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);

  if (target_mode_ == request_mode) {
    // Consecutive RequestStart() call for the same mode. Do nothing.
    return;
  }

  if (!IsRequestAllowed(target_mode_, request_mode))
    return;

  VLOG(1) << "Session start requested: " << request_mode;
  target_mode_ = request_mode;
  if (arc_session_ && arc_session_->IsStopRequested()) {
    // This is the case where RequestStop() was called, but before
    // |arc_session_| had finshed stopping, RequestStart() is called.
    // Do nothing in the that case, since when |arc_session_| does actually
    // stop, OnSessionStopped() will be called, where it should automatically
    // restart.
    return;
  }

  if (restart_timer_.IsRunning()) {
    // |restart_timer_| may be running if this is upgrade request in a
    // following scenario.
    // - RequestStart(MINI_INSTANCE)
    // - RequestStop()
    // - RequestStart(MINI_INSTANCE)
    // - OnSessionStopped()
    // - RequestStart(FULL_INSTANCE) before RestartArcSession() is called.
    // In such a case, defer the operation to RestartArcSession() called later.
    return;
  }

  if (!resumed_) {
    VLOG(1) << "Deferring to start ARC instance. "
            << "This runner hasn't been resumed yet.";
    return;
  }

  // No asynchronous event is expected later. Trigger the ArcSession now.
  StartArcSession();
}

void ArcSessionRunner::RequestStop() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);

  VLOG(1) << "Session stop requested";
  target_mode_ = std::nullopt;

  if (arc_session_) {
    // If |arc_session_| is running, stop it.
    // Note that |arc_session_| may be already in the process of stopping or
    // be stopped.
    // E.g. RequestStart() -> RequestStop() -> RequestStart() -> RequestStop()
    // case. If the second RequestStop() is called before the first
    // RequestStop() is not yet completed for the instance, Stop() of the
    // instance is called again, but it is no-op as expected.
    arc_session_->Stop();
  }

  // In case restarting is in progress, cancel it.
  restart_timer_.Stop();
}

void ArcSessionRunner::OnShutdown() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);

  VLOG(1) << "OnShutdown";
  target_mode_ = std::nullopt;
  restart_timer_.Stop();
  if (arc_session_)
    arc_session_->OnShutdown();
  // ArcSession::OnShutdown() invokes OnSessionStopped() synchronously.
  // In the observer method, |arc_session_| should be destroyed.
  DCHECK(!arc_session_);
}

void ArcSessionRunner::SetUserInfo(
    const cryptohome::Identification& cryptohome_id,
    const std::string& hash,
    const std::string& serial_number) {
  // |cryptohome_id.id()| and |hash| can be empty in unit tests. This function
  // can also be called multiple times in tests.
  // TODO(khmel): Fix tests and add DCHECKs to make sure they are not empty
  // and the function is called only once.
  DCHECK(!IsArcVmEnabled() || !serial_number.empty());
  cryptohome_id_ = cryptohome_id;
  user_id_hash_ = hash;
  serial_number_ = serial_number;
  if (arc_session_)
    arc_session_->SetUserInfo(cryptohome_id_, user_id_hash_, serial_number_);
}

void ArcSessionRunner::SetDemoModeDelegate(
    std::unique_ptr<ArcClientAdapter::DemoModeDelegate> delegate) {
  demo_mode_delegate_ = std::move(delegate);
}

void ArcSessionRunner::TrimVmMemory(TrimVmMemoryCallback callback,
                                    int page_limit) {
  if (arc_session_) {
    arc_session_->TrimVmMemory(std::move(callback), page_limit);
    return;
  }
  LOG(WARNING) << "TrimVmMemory is called when no ARC session is running";
  std::move(callback).Run(/*success=*/false,
                          /*failure_reason=*/"No ARC session is running");
}

void ArcSessionRunner::SetRestartDelayForTesting(
    const base::TimeDelta& restart_delay) {
  DCHECK(!arc_session_);
  DCHECK(!restart_timer_.IsRunning());
  restart_delay_ = restart_delay;
}

void ArcSessionRunner::StartArcSession() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  DCHECK(!restart_timer_.IsRunning());
  DCHECK(target_mode_.has_value());

  VLOG(1) << "Starting ARC instance";
  if (!arc_session_) {
    arc_session_ = factory_.Run();
    if (!cryptohome_id_.id().empty() && !user_id_hash_.empty() &&
        !serial_number_.empty()) {
      arc_session_->SetUserInfo(cryptohome_id_, user_id_hash_, serial_number_);
    }
    arc_session_->SetDefaultDeviceScaleFactor(default_device_scale_factor_);
    arc_session_->SetDemoModeDelegate(demo_mode_delegate_.get());
    arc_session_->SetUseVirtioBlkData(use_virtio_blk_data_);
    arc_session_->SetArcSignedIn(arc_signed_in_);
    arc_session_->AddObserver(this);
    arc_session_->StartMiniInstance();
    // Record the UMA only when |restart_after_crash_count_| is zero to avoid
    // recording an auto-restart-then-crash loop. Such a crash loop is recorded
    // separately with RecordInstanceRestartAfterCrashUma().
    if (!restart_after_crash_count_)
      RecordInstanceCrashUma(ArcContainerLifetimeEvent::CONTAINER_STARTING);
  }
  if (target_mode_ == ArcInstanceMode::FULL_INSTANCE) {
    // Do not std::move the params intentionally. RestartArcSession() can
    // reuse the params without preceded by resetting them.
    arc_session_->RequestUpgrade(upgrade_params_);
  }
}

void ArcSessionRunner::RestartArcSession() {
  VLOG(0) << "Restarting ARC instance";
  // The order is important here. Call StartArcSession(), then notify observers.
  StartArcSession();
  for (auto& observer : observer_list_)
    observer.OnSessionRestarting();
}

void ArcSessionRunner::OnSessionStopped(ArcStopReason stop_reason,
                                        bool was_running,
                                        bool full_requested) {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  DCHECK(arc_session_);
  DCHECK(!restart_timer_.IsRunning());

  VLOG(0) << "ARC stopped: " << stop_reason;

  arc_session_->RemoveObserver(this);
  arc_session_.reset();

  const std::optional<ArcContainerLifetimeEvent> uma_to_record =
      GetArcContainerLifetimeEvent(restart_after_crash_count_, stop_reason,
                                   was_running);
  if (uma_to_record.has_value())
    RecordInstanceCrashUma(uma_to_record.value());

  const bool restarting =
      IsRestartNeeded(target_mode_, stop_reason, was_running);

  if (restarting && stop_reason == ArcStopReason::CRASH) {
    ++restart_after_crash_count_;
  } else {
    // The session ended. Record the restart count.
    RecordInstanceRestartAfterCrashUma(restart_after_crash_count_);
    restart_after_crash_count_ = 0;
  }

  if (restarting) {
    // There was a previous invocation and it crashed for some reason. Try
    // starting ARC instance later again.
    // Note that even |restart_delay_| is 0 (for testing), it needs to
    // PostTask, because observer callback may call RequestStart()/Stop().
    VLOG(0) << "ARC restarting";
    restart_timer_.Start(FROM_HERE, restart_delay_,
                         base::BindOnce(&ArcSessionRunner::RestartArcSession,
                                        weak_ptr_factory_.GetWeakPtr()));
  }

  // The observers should be agnostic to the existence of the limited-purpose
  // instance.
  if (full_requested) {
    for (auto& observer : observer_list_)
      observer.OnSessionStopped(stop_reason, restarting);
  }
}

}  // namespace arc