system_live_caption_service_browsertest.cc

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/ash/accessibility/live_caption/system_live_caption_service.h"

#include <memory>

#include "ash/constants/ash_features.h"
#include "ash/constants/ash_switches.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/run_loop.h"
#include "chrome/browser/accessibility/live_caption/live_caption_controller_factory.h"
#include "chrome/browser/ash/accessibility/live_caption/system_live_caption_service_factory.h"
#include "chrome/browser/ash/login/session/user_session_initializer.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/profiles/profile_manager.h"
#include "chrome/browser/profiles/profile_test_util.h"
#include "chrome/browser/speech/cros_speech_recognition_service_factory.h"
#include "chrome/browser/speech/fake_speech_recognition_service.h"
#include "chrome/browser/speech/fake_speech_recognizer.h"
#include "chrome/browser/speech/speech_recognizer_delegate.h"
#include "chrome/browser/ui/browser.h"
#include "chrome/test/base/in_process_browser_test.h"
#include "components/live_caption/caption_bubble_controller.h"
#include "components/live_caption/live_caption_controller.h"
#include "components/live_caption/pref_names.h"
#include "components/soda/constants.h"
#include "components/soda/soda_installer.h"
#include "content/public/test/browser_test.h"
#include "media/audio/audio_system.h"
#include "media/base/audio_parameters.h"
#include "media/base/media_switches.h"
#include "media/mojo/mojom/speech_recognition_service.mojom.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"

using ::testing::_;

namespace ash {

namespace {
static constexpr int kDefaultSampleRateMs = 16000;
static constexpr int kDefaultPollingTimesHz = 10;
static constexpr char kAlternativeLanguageName[] = "es-ES";
static constexpr char kDefaultLanguageName[] = "en-US";
}  // namespace

// We need to swap out the device audio system for a fake one.
class MockAudioSystem : public media::AudioSystem {
 public:
  MockAudioSystem() = default;
  ~MockAudioSystem() override = default;
  MockAudioSystem(const MockAudioSystem&) = delete;
  MockAudioSystem& operator=(const MockAudioSystem&) = delete;

  // media::AudioSystem overrides:
  MOCK_METHOD2(GetInputStreamParameters,
               void(const std::string& device_id,
                    OnAudioParamsCallback callback));
  MOCK_METHOD2(GetOutputStreamParameters,
               void(const std::string& device_id,
                    OnAudioParamsCallback on_params_cb));
  MOCK_METHOD1(HasInputDevices, void(OnBoolCallback on_has_devices_cb));
  MOCK_METHOD1(HasOutputDevices, void(OnBoolCallback on_has_devices_cb));
  MOCK_METHOD2(GetDeviceDescriptions,
               void(bool for_input,
                    OnDeviceDescriptionsCallback on_descriptions_cp));
  MOCK_METHOD2(GetAssociatedOutputDeviceID,
               void(const std::string& input_device_id,
                    OnDeviceIdCallback on_device_id_cb));
  MOCK_METHOD2(GetInputDeviceInfo,
               void(const std::string& input_device_id,
                    OnInputDeviceInfoCallback on_input_device_info_cb));
};

// Creates and returns a stub audio system that reports a reasonable default for
// audio device parameters.
std::unique_ptr<media::AudioSystem> CreateStubAudioSystem() {
  const media::AudioParameters params(
      media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
      media::ChannelLayoutConfig::Stereo(), kDefaultSampleRateMs,
      kDefaultSampleRateMs / (kDefaultPollingTimesHz * 2));

  std::unique_ptr<MockAudioSystem> stub_audio_system =
      std::make_unique<MockAudioSystem>();
  EXPECT_CALL(*stub_audio_system, GetInputStreamParameters(_, _))
      .WillRepeatedly(
          [params](auto, MockAudioSystem::OnAudioParamsCallback cb) {
            std::move(cb).Run(params);
          });

  return stub_audio_system;
}

// Runs the system live caption service backed by a fake audio system and SODA
// installation.
class SystemLiveCaptionServiceTest
    : public InProcessBrowserTest,
      public speech::FakeSpeechRecognitionService::Observer {
 public:
  SystemLiveCaptionServiceTest() {
    scoped_feature_list_.InitWithFeatures(
        /*enabled_features=*/{features::kOnDeviceSpeechRecognition,
                              features::kSystemLiveCaption,
                              media::kLiveCaptionMultiLanguage},
        /*disabled_features=*/{});
  }

  ~SystemLiveCaptionServiceTest() override = default;
  SystemLiveCaptionServiceTest(const SystemLiveCaptionServiceTest&) = delete;
  SystemLiveCaptionServiceTest& operator=(const SystemLiveCaptionServiceTest&) =
      delete;

  void SetUpCommandLine(base::CommandLine* command_line) override {
    command_line->AppendSwitch(switches::kIgnoreUserProfileMappingForTests);
  }

  void SetUpOnMainThread() override {
    InProcessBrowserTest::SetUpOnMainThread();
    primary_profile_ = browser()->profile();

    // Create an additional profile. We will verify that its caption bubble is
    // inactive, since only the primary profile should be processing system
    // audio.
    ProfileManager* profile_manager = g_browser_process->profile_manager();
    const base::FilePath profile_path =
        profile_manager->GenerateNextProfileDirectoryPath();
    secondary_profile_ =
        &profiles::testing::CreateProfileSync(profile_manager, profile_path);
    CHECK(secondary_profile_);

    // Replace our CrosSpeechRecognitionService with a fake one.
    fake_speech_recognition_service_ =
        CrosSpeechRecognitionServiceFactory::GetInstanceForTest()
            ->SetTestingSubclassFactoryAndUse(
                primary_profile_, base::BindOnce([](content::BrowserContext*) {
                  return std::make_unique<
                      speech::FakeSpeechRecognitionService>();
                }));
    fake_speech_recognition_service_->AddObserver(this);

    // Pass in an inert audio system backend.
    SystemLiveCaptionServiceFactory::GetInstance()
        ->GetForProfile(primary_profile_)
        ->set_audio_system_factory_for_testing(
            base::BindRepeating(&CreateStubAudioSystem));

    // Don't actually try to download SODA.
    speech::SodaInstaller::GetInstance()->NeverDownloadSodaForTesting();

    // Use English as our caption language.
    primary_profile_->GetPrefs()->SetString(prefs::kLiveCaptionLanguageCode,
                                            speech::kUsEnglishLocale);
  }

  ::captions::CaptionBubbleController* GetCaptionBubbleController(
      Profile* profile) const {
    return ::captions::LiveCaptionControllerFactory::GetInstance()
        ->GetForProfile(profile)
        ->caption_bubble_controller_for_testing();
  }

  void SetLiveCaptionsPref(bool enabled) {
    primary_profile_->GetPrefs()->SetBoolean(prefs::kLiveCaptionEnabled,
                                             enabled);
    base::RunLoop().RunUntilIdle();
  }

  void SetLanguagePref(const std::string& language) {
    primary_profile_->GetPrefs()->SetString(prefs::kLiveCaptionLanguageCode,
                                            language);
    base::RunLoop().RunUntilIdle();
  }

  // Emit the given text from our fake speech recognition service.
  void EmulateRecognizedSpeech(const std::string& text) {
    ASSERT_TRUE(current_audio_fetcher_);
    current_audio_fetcher_->SendSpeechRecognitionResult(
        media::SpeechRecognitionResult(text, /*is_final=*/false));
    base::RunLoop().RunUntilIdle();
  }

  // Meet the preconditions for live captioning so that our logic-under-test
  // starts executing.
  void StartLiveCaptioning() {
    SetLiveCaptionsPref(/*enabled=*/true);
    speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting(
        speech::GetLanguageCode(primary_profile_->GetPrefs()->GetString(
            prefs::kLiveCaptionLanguageCode)));
    speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting();
    // Events must propogate, so we wait after install.
    base::RunLoop().RunUntilIdle();
    SystemLiveCaptionServiceFactory::GetInstance()
        ->GetForProfile(primary_profile_)
        ->OnNonChromeOutputStarted();
    base::RunLoop().RunUntilIdle();
  }

  // FakeSpeechRecognitionService::Observer
  void OnRecognizerBound(
      speech::FakeSpeechRecognizer* bound_recognizer) override {
    if (bound_recognizer->recognition_options()->recognizer_client_type ==
        media::mojom::RecognizerClientType::kLiveCaption) {
      current_audio_fetcher_ = bound_recognizer->GetWeakPtr();
    }
  }

  // Unowned.
  raw_ptr<Profile, DanglingUntriaged> primary_profile_;
  raw_ptr<Profile, DanglingUntriaged> secondary_profile_;

  // current_audio_fetcher_ is a speech recognizer fake that is used to assert
  // correct behavior when a session is started by the SystemLiveCaptionService.
  // When a session is started `OnRecognizerBound` is invoked which will
  // populate the current_audio_fetcher_ with the correct audio fetcher.  If
  // this pointer is null then that means that the SystemLiveCapitonService
  // has yet to start a session, so if we want to assert that a session
  // hasn't been started yet thus far in a test we can expect this
  // pointer to be null.
  base::WeakPtr<speech::FakeSpeechRecognizer> current_audio_fetcher_ = nullptr;

  raw_ptr<speech::FakeSpeechRecognitionService, DanglingUntriaged>
      fake_speech_recognition_service_;

  base::test::ScopedFeatureList scoped_feature_list_;
};

// Tests that system audio is processed only when all our preconditions are
// satisfied.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, Triggering) {
  // We should be waiting for the feature to be enabled and for SODA to be
  // installed.
  EXPECT_FALSE(current_audio_fetcher_);

  // Enable feature.
  SetLiveCaptionsPref(/*enabled=*/true);

  // We should still be waiting for SODA to be installed.
  EXPECT_FALSE(current_audio_fetcher_);

  // Fake successful language pack install.
  speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting(
      speech::LanguageCode::kEnUs);
  base::RunLoop().RunUntilIdle();

  // We should be waiting for the base binary too.
  EXPECT_FALSE(current_audio_fetcher_);

  // Fake successful base binary install.
  speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting();
  base::RunLoop().RunUntilIdle();

  // After language and binary install, still should be false until output is
  // triggered.  The client should be created at this point though.
  ASSERT_TRUE(current_audio_fetcher_);
  EXPECT_FALSE(current_audio_fetcher_->is_capturing_audio());

  // Start audio.
  // Set audio output running.
  SystemLiveCaptionServiceFactory::GetInstance()
      ->GetForProfile(primary_profile_)
      ->OnNonChromeOutputStarted();
  base::RunLoop().RunUntilIdle();

  // Should now be processing system audio.
  EXPECT_TRUE(current_audio_fetcher_->is_capturing_audio());

  // Now turn off live captioning.
  SetLiveCaptionsPref(/*enabled=*/false);

  // This should stop audio fetching.
  EXPECT_FALSE(current_audio_fetcher_);
}

// Test that feature is gated on successful SODA install.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, SodaError) {
  // Enable feature so that we start listening for SODA install status.
  SetLiveCaptionsPref(/*enabled=*/true);

  // Fake successful base binary install but failed language install.
  speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting();
  speech::SodaInstaller::GetInstance()->NotifySodaErrorForTesting(
      speech::LanguageCode::kEnUs);
  base::RunLoop().RunUntilIdle();

  // Our language is not yet installed, so we shouldn't be processing audio.
  EXPECT_FALSE(current_audio_fetcher_);
}

// Tests that our feature listens to the correct SODA language.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, SodaIrrelevantError) {
  // Set audio output running
  auto* live_caption_service =
      SystemLiveCaptionServiceFactory::GetInstance()->GetForProfile(
          primary_profile_);
  live_caption_service->OnNonChromeOutputStarted();
  // Enable feature so that we start listening for SODA install status.
  SetLiveCaptionsPref(/*enabled=*/true);

  // Fake successful base binary install.
  speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting();
  base::RunLoop().RunUntilIdle();

  // Fake failed install of an unrelated language.
  speech::SodaInstaller::GetInstance()->NotifySodaErrorForTesting(
      speech::LanguageCode::kFrFr);
  base::RunLoop().RunUntilIdle();

  // Our language is not yet installed, so we shouldn't be processing audio.
  // Therefore the current_audio_fetcher_ should be null.
  EXPECT_FALSE(current_audio_fetcher_);

  // Fake successful install of our language.
  speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting(
      speech::LanguageCode::kEnUs);
  base::RunLoop().RunUntilIdle();
  // Tell the caption service audio is running again. This is needed since we
  // don't actually go to a fake cras audio system in this test.
  live_caption_service->OnNonChromeOutputStarted();
  base::RunLoop().RunUntilIdle();
  // We should have ignored the unrelated error.
  ASSERT_TRUE(current_audio_fetcher_);
  EXPECT_TRUE(current_audio_fetcher_->is_capturing_audio());
}

// Test that captions are only dispatched for the primary profile.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, DispatchToProfile) {
  StartLiveCaptioning();

  // Capture fake audio.
  EmulateRecognizedSpeech("System audio caption");
  ASSERT_TRUE(current_audio_fetcher_);
  EXPECT_TRUE(current_audio_fetcher_->is_capturing_audio());

  // Transcribed speech should be displayed from the primary profile.
  auto* primary_bubble = GetCaptionBubbleController(primary_profile_);
  ASSERT_NE(nullptr, primary_bubble);
  EXPECT_TRUE(primary_bubble->IsWidgetVisibleForTesting());
  EXPECT_FALSE(primary_bubble->IsGenericErrorMessageVisibleForTesting());
  EXPECT_EQ("System audio caption",
            primary_bubble->GetBubbleLabelTextForTesting());

  // Transcribed speech should _not_ be shown for any other profiles.
  EXPECT_EQ(nullptr, GetCaptionBubbleController(secondary_profile_));
}

IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, StartStopStart) {
  StartLiveCaptioning();

  // Capture fake audio.
  EmulateRecognizedSpeech("System audio caption");
  ASSERT_TRUE(current_audio_fetcher_);
  EXPECT_TRUE(current_audio_fetcher_->is_capturing_audio());

  // Transcribed speech should be displayed from the primary profile.
  // The added captions are all added as non-finals, so they over-write not
  // append.
  auto* primary_bubble = GetCaptionBubbleController(primary_profile_);
  ASSERT_NE(nullptr, primary_bubble);
  EXPECT_TRUE(primary_bubble->IsWidgetVisibleForTesting());
  EXPECT_FALSE(primary_bubble->IsGenericErrorMessageVisibleForTesting());
  EXPECT_EQ("System audio caption",
            primary_bubble->GetBubbleLabelTextForTesting());

  // Stop
  SystemLiveCaptionServiceFactory::GetInstance()
      ->GetForProfile(primary_profile_)
      ->OnNonChromeOutputStopped();
  EmulateRecognizedSpeech(" more after stop ");
  EXPECT_EQ(" more after stop ",
            primary_bubble->GetBubbleLabelTextForTesting());
  // Idle stop.
  base::RunLoop().RunUntilIdle();

  // Start again.
  SystemLiveCaptionServiceFactory::GetInstance()
      ->GetForProfile(primary_profile_)
      ->OnNonChromeOutputStarted();
  EmulateRecognizedSpeech(" and yet more ");

  EXPECT_EQ(" and yet more ", primary_bubble->GetBubbleLabelTextForTesting());
  // Transcribed speech should _not_ be shown for any other profiles.
  EXPECT_EQ(nullptr, GetCaptionBubbleController(secondary_profile_));
}

// Test that we can cease transcription by closing the bubble UI.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, EarlyStopping) {
  StartLiveCaptioning();

  // Fake some speech.
  EmulateRecognizedSpeech("System audio caption");

  // Bubble UI should be active to show transcribed speech.
  auto* primary_bubble = GetCaptionBubbleController(primary_profile_);
  ASSERT_NE(nullptr, primary_bubble);

  // Emulate closing bubble UI.
  primary_bubble->CloseActiveModelForTesting();

  // Fake detection of more speech, to which the bubble should respond by
  // requesting an early stop.
  EmulateRecognizedSpeech("More system audio captions");

  // The speech recognition service should have received the early stop request.
  // The client will be deleted.
  ASSERT_FALSE(current_audio_fetcher_);
}

// Test that the UI is closed when transcription is complete.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, EndOfStream) {
  StartLiveCaptioning();
  ASSERT_TRUE(current_audio_fetcher_);

  // Fake some speech.
  EmulateRecognizedSpeech("System audio caption");

  // Bubble UI should be active to show transcribed speech.
  auto* primary_bubble = GetCaptionBubbleController(primary_profile_);
  ASSERT_NE(nullptr, primary_bubble);
  EXPECT_TRUE(primary_bubble->IsWidgetVisibleForTesting());

  // Emulate end of audio stream.
  current_audio_fetcher_->MarkDone();
  base::RunLoop().RunUntilIdle();

  // Bubble should not be shown since there is no more audio.
  primary_bubble = GetCaptionBubbleController(primary_profile_);
  ASSERT_NE(nullptr, primary_bubble);
  EXPECT_FALSE(primary_bubble->IsWidgetVisibleForTesting());
}

// Test that an error message is shown if something goes wrong.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, ServiceError) {
  StartLiveCaptioning();
  ASSERT_TRUE(current_audio_fetcher_);

  // Fake some speech.
  EmulateRecognizedSpeech("System audio caption");

  // Bubble UI should be active to show transcribed speech.
  auto* primary_bubble = GetCaptionBubbleController(primary_profile_);
  ASSERT_NE(nullptr, primary_bubble);
  EXPECT_TRUE(primary_bubble->IsWidgetVisibleForTesting());
  EXPECT_FALSE(primary_bubble->IsGenericErrorMessageVisibleForTesting());

  // Emulate recognition error.
  current_audio_fetcher_->SendSpeechRecognitionError();
  base::RunLoop().RunUntilIdle();

  // Bubble should still be shown and should display error text.
  primary_bubble = GetCaptionBubbleController(primary_profile_);
  ASSERT_NE(nullptr, primary_bubble);
  EXPECT_TRUE(primary_bubble->IsWidgetVisibleForTesting());
  EXPECT_TRUE(primary_bubble->IsGenericErrorMessageVisibleForTesting());
}

// Tests that the System Live Caption Service uses the correct language as set
// by the kLiveCaptionLanguageCode preference.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest, UsesCorrectLanguage) {
  SetLanguagePref(kAlternativeLanguageName);
  StartLiveCaptioning();
  ASSERT_TRUE(current_audio_fetcher_);

  // retrieve the recognition options struct passed to the recognition service.
  // we use this to assert that the correct language was passed to the service.
  const media::mojom::SpeechRecognitionOptions* recognition_options =
      current_audio_fetcher_->recognition_options();

  // Should now be processing system audio.
  EXPECT_TRUE(current_audio_fetcher_->is_capturing_audio());

  // Assert language is correct.
  ASSERT_NE(recognition_options, nullptr);
  EXPECT_EQ(std::string(kAlternativeLanguageName),
            recognition_options->language.value());
}

// When a language changes in the middle of a session the service must switch
// out the speech recognition client for a new one with the selected language.
// This tests that while there are non chrome outputs running that the session
// restarts automatically.
IN_PROC_BROWSER_TEST_F(SystemLiveCaptionServiceTest,
                       SwitchesLanguageCorrectly) {
  StartLiveCaptioning();
  ASSERT_TRUE(current_audio_fetcher_);

  // retrieve the recognition options struct passed to the recognition service.
  // we use this to assert that the correct language was passed to the service.
  const media::mojom::SpeechRecognitionOptions* recognition_options =
      current_audio_fetcher_->recognition_options();

  // Should now be processing system audio.
  EXPECT_TRUE(current_audio_fetcher_->is_capturing_audio());

  // Assert language is correct.
  ASSERT_NE(recognition_options, nullptr);
  ASSERT_TRUE(recognition_options->language.has_value());
  EXPECT_EQ(std::string(kDefaultLanguageName),
            recognition_options->language.value());

  // This should restart the recognizer with the correct language.  The
  // language pack will be installed by the live caption controller and then
  // the SODA Installer will notify the SystemLiveCaptionService.
  SetLanguagePref(kAlternativeLanguageName);

  // For this test case we want to switch while output is running so that we
  // restart the session without explicitly calling OnNonChromeOutputStarted.
  SystemLiveCaptionServiceFactory::GetInstance()
      ->GetForProfile(primary_profile_)
      ->set_num_non_chrome_output_streams_for_testing(
          /*num_output_streams=*/1);

  // Until SODA installs we should do nothing.  The Client will be created at
  // this point so we can assert that the current audio fetcher is not capturing
  // audio.
  EXPECT_FALSE(current_audio_fetcher_);

  // Emulate successful SODA installation from LiveCaptionController.
  speech::SodaInstaller::GetInstance()->NotifySodaInstalledForTesting(
      speech::GetLanguageCode(kAlternativeLanguageName));
  base::RunLoop().RunUntilIdle();

  EXPECT_TRUE(current_audio_fetcher_->is_capturing_audio());

  // We destroy the old options struct when resetting the speech recogntion
  // client.
  recognition_options = current_audio_fetcher_->recognition_options();

  ASSERT_NE(recognition_options, nullptr);
  ASSERT_TRUE(recognition_options->language.has_value());
  EXPECT_EQ(std::string(kAlternativeLanguageName),
            recognition_options->language.value());
}

}  // namespace ash
chromium/chrome/browser/ash/accessibility/live_caption/system_live_caption_service_browsertest.cc