recording_session.ts | Explore in Territory

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import {
  POWER_SCALE_FACTOR,
  SAMPLE_RATE,
  SAMPLES_PER_SLICE,
} from './audio_constants.js';
import {PlatformHandler} from './platform_handler.js';
import {computed, effect, signal} from './reactive/signal.js';
import {SodaEventTransformer, Transcription} from './soda/soda.js';
import {SodaSession} from './soda/types.js';
import {
  assert,
  assertExhaustive,
  assertExists,
  assertNotReached,
} from './utils/assert.js';
import {AsyncJobInfo, AsyncJobQueue} from './utils/async_job_queue.js';
import {InteriorMutableArray} from './utils/interior_mutable_array.js';
import {Unsubscribe} from './utils/observer_list.js';
import {clamp} from './utils/utils.js';

declare global {
  interface DisplayMediaStreamOptions {
    // https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getDisplayMedia#systemaudio
    systemAudio?: 'exclude'|'include';
  }
}

const AUDIO_MIME_TYPE = 'audio/webm;codecs=opus';

const TIME_SLICE_MS = 100;

interface RecordingProgress {
  // Length in seconds.
  length: number;
  // All samples of the power. To conserve space while saving metadata with
  // JSON and since this is used for visualization only, the value will be
  // integer in range [0, 255], scaled from the original value of [0, 1].
  powers: InteriorMutableArray<number>;
  // Transcription of the ongoing recording. null if transcription is never
  // enabled throughout the recording.
  transcription: Transcription|null;
}

function getMicrophoneStream(micId: string): Promise<MediaStream> {
  return navigator.mediaDevices.getUserMedia({
    audio: {
      deviceId: {exact: micId},
    },
  });
}

interface RecordingSessionConfig {
  includeSystemAudio: boolean;
  micId: string;
  platformHandler: PlatformHandler;
  speakerLabelEnabled: boolean;
}

let audioCtxGlobal: AudioContext|null = null;

async function getAudioContext(): Promise<AudioContext> {
  if (audioCtxGlobal === null) {
    audioCtxGlobal = new AudioContext({sampleRate: SAMPLE_RATE});
    await audioCtxGlobal.audioWorklet.addModule('./static/audio_worklet.js');
  }
  return audioCtxGlobal;
}

interface SodaSessionInfo {
  session: SodaSession;
  startOffsetMs: number;
  unsubscribe: Unsubscribe;
}

/**
 * A recording session to retrieve audio input and produce an audio blob output.
 */
export class RecordingSession {
  private readonly dataChunks: Blob[] = [];

  private readonly sodaEventTransformer: SodaEventTransformer;

  private currentSodaSession: SodaSessionInfo|null = null;

  private readonly sodaEnableQueue = new AsyncJobQueue('keepLatest');

  private readonly powers = signal(new InteriorMutableArray<number>([]));

  private readonly transcription = signal<Transcription|null>(null);

  private processedSamples = 0;

  private readonly mediaRecorder: MediaRecorder;

  private readonly audioProcessor: AudioWorkletNode;

  private readonly combinedInputNode: MediaStreamAudioDestinationNode;

  private micAudioSourceNode: MediaStreamAudioSourceNode|null = null;

  private systemAudioSourceNode: MediaStreamAudioSourceNode|null = null;

  private micMuted = false;

  readonly progress = computed<RecordingProgress>(() => {
    const powers = this.powers.value;
    const length = (powers.length * SAMPLES_PER_SLICE) / SAMPLE_RATE;
    return {
      length,
      powers,
      transcription: this.transcription.value,
    };
  });

  private constructor(
    private readonly audioCtx: AudioContext,
    private readonly config: RecordingSessionConfig,
  ) {
    this.sodaEventTransformer = new SodaEventTransformer(
      config.speakerLabelEnabled,
    );
    this.combinedInputNode = audioCtx.createMediaStreamDestination();
    this.audioProcessor = new AudioWorkletNode(audioCtx, 'audio-processor');
    this.mediaRecorder = new MediaRecorder(this.combinedInputNode.stream, {
      mimeType: AUDIO_MIME_TYPE,
    });

    this.mediaRecorder.addEventListener('dataavailable', (e) => {
      this.onDataAvailable(e);
    });
    this.mediaRecorder.addEventListener('error', (e) => {
      this.onError(e);
    });

    this.audioProcessor.port.addEventListener(
      'message',
      (ev: MessageEvent<Float32Array>) => {
        const samples = ev.data;
        // Calculates the power of the slice. The value range is [0, 1].
        const power = Math.sqrt(
          samples.map((v) => v * v).reduce((x, y) => x + y, 0) / samples.length,
        );
        const scaledPower = clamp(
          Math.floor(power * POWER_SCALE_FACTOR),
          0,
          POWER_SCALE_FACTOR - 1,
        );
        this.powers.value = this.powers.value.push(scaledPower);
        this.currentSodaSession?.session.addAudio(samples);
        this.processedSamples += samples.length;
      },
    );
  }

  /**
   * Sets the mute state of the mic stream.
   *
   * Note that this doesn't change the state of the system audio stream, as the
   * mute button is intended to only mute the mic stream.
   */
  setMicMuted(muted: boolean): void {
    this.micMuted = muted;
    if (this.micAudioSourceNode !== null) {
      for (const track of this.micAudioSourceNode.mediaStream.getTracks()) {
        track.enabled = !muted;
      }
    }
  }

  private connectSourceNode(node: MediaStreamAudioSourceNode) {
    node.connect(this.combinedInputNode);
    node.connect(this.audioProcessor);
  }

  private async initMicAudioSourceNode() {
    if (this.micAudioSourceNode !== null) {
      return;
    }

    const micStream = await getMicrophoneStream(this.config.micId);
    this.micAudioSourceNode = this.audioCtx.createMediaStreamSource(micStream);
    this.connectSourceNode(this.micAudioSourceNode);

    // Set the mic muted setting again onto the new mic stream.
    this.setMicMuted(this.micMuted);
  }

  private async initSystemAudioSourceNode() {
    if (this.systemAudioSourceNode !== null) {
      return;
    }

    if (!this.config.includeSystemAudio) {
      return;
    }

    const systemAudioStream =
      await this.config.platformHandler.getSystemAudioMediaStream();
    this.systemAudioSourceNode =
      this.audioCtx.createMediaStreamSource(systemAudioStream);
    this.connectSourceNode(this.systemAudioSourceNode);
  }

  private closeAudioSourceNode(node: MediaStreamAudioSourceNode) {
    for (const track of node.mediaStream.getTracks()) {
      track.stop();
    }
    node.disconnect();
  }

  private closeMicAudioSourceNode() {
    if (this.micAudioSourceNode !== null) {
      this.closeAudioSourceNode(this.micAudioSourceNode);
      this.micAudioSourceNode = null;
    }
  }

  private closeSystemAudioSourceNode() {
    if (this.systemAudioSourceNode !== null) {
      this.closeAudioSourceNode(this.systemAudioSourceNode);
      this.systemAudioSourceNode = null;
    }
  }

  async setPaused(paused: boolean): Promise<void> {
    if (paused) {
      await this.audioCtx.suspend();
      // We still need to explicitly pause the media recorder, otherwise the
      // exported webm will have wrong timestamps.
      this.mediaRecorder.pause();
      // Close the mic when paused, so the "mic in use" indicator would go away.
      this.closeMicAudioSourceNode();
    } else {
      await this.initMicAudioSourceNode();
      this.mediaRecorder.resume();
      await this.audioCtx.resume();
    }
  }

  private onDataAvailable(event: BlobEvent): void {
    // TODO(shik): Save the data to file system while recording.
    this.dataChunks.push(event.data);
  }

  private onError(event: Event): void {
    // TODO(shik): Proper error handling.
    console.error(event);
  }

  private async ensureSodaInstalled(): Promise<void> {
    const {platformHandler} = this.config;
    const sodaState = platformHandler.sodaState;
    assert(
      sodaState.value.kind !== 'unavailable',
      `Trying to install SODA when it's unavailable`,
    );
    if (sodaState.value.kind === 'installed') {
      return;
    }
    platformHandler.installSoda();
    await new Promise<void>((resolve, reject) => {
      effect(({dispose}) => {
        switch (sodaState.value.kind) {
          case 'error':
            dispose();
            reject(new Error('Install SODA failed'));
            break;
          case 'installed':
            dispose();
            resolve();
            break;
          case 'notInstalled':
          case 'installing':
            break;
          case 'unavailable':
            return assertNotReached(
              `Trying to install SODA when it's unavailable`,
            );
          default:
            assertExhaustive(sodaState.value);
        }
      });
    });
  }

  startNewSodaSession(): AsyncJobInfo {
    return this.sodaEnableQueue.push(async () => {
      if (this.currentSodaSession !== null) {
        return;
      }
      if (this.transcription.value === null) {
        this.transcription.value = new Transcription([]);
      }
      await this.ensureSodaInstalled();
      // Abort current running job if there's a new enable/disable request.
      if (this.sodaEnableQueue.hasPendingJob()) {
        return;
      }

      const session = await this.config.platformHandler.newSodaSession();
      const unsubscribe = session.subscribeEvent((ev) => {
        this.sodaEventTransformer.addEvent(
          ev,
          assertExists(this.currentSodaSession).startOffsetMs,
        );
        this.transcription.value = this.sodaEventTransformer.getTranscription();
      });
      this.currentSodaSession = {
        session,
        unsubscribe,
        startOffsetMs: (this.processedSamples / SAMPLE_RATE) * 1000,
      };
      await session.start();
    });
  }

  stopSodaSession(): AsyncJobInfo {
    return this.sodaEnableQueue.push(async () => {
      if (this.currentSodaSession === null) {
        return;
      }
      await this.currentSodaSession.session.stop();
      this.currentSodaSession.unsubscribe();
      this.currentSodaSession = null;
    });
  }

  /**
   * Starts the recording session.
   *
   * Note that each recording session is intended to only be started once.
   */
  async start(transcriptionEnabled: boolean): Promise<void> {
    // Suspend the context while initializing the source nodes.
    await this.audioCtx.suspend();

    if (transcriptionEnabled) {
      // If the transcription is enabled from the beginning, await for the soda
      // session to start to avoid having start of audio not transcribed.
      // TODO(pihsun): Should this be happened asynchronously and have the
      // audio buffered?
      await this.startNewSodaSession().result;
    }

    await Promise.all([
      this.initMicAudioSourceNode(),
      this.initSystemAudioSourceNode(),
    ]);

    // Resume the context and start the recorder & audio processor after we've
    // initialized all sources.
    await this.audioCtx.resume();

    this.audioProcessor.port.start();
    this.mediaRecorder.start(TIME_SLICE_MS);
  }

  async finish(): Promise<Blob> {
    const stopped = new Promise((resolve) => {
      this.mediaRecorder.addEventListener('stop', resolve, {once: true});
    });
    this.mediaRecorder.stop();
    this.audioProcessor.port.close();
    await this.stopSodaSession().result;
    await stopped;

    this.closeMicAudioSourceNode();
    this.closeSystemAudioSourceNode();

    return new Blob(this.dataChunks, {type: AUDIO_MIME_TYPE});
  }

  static async create(
    config: RecordingSessionConfig,
  ): Promise<RecordingSession> {
    const audioCtx = await getAudioContext();
    return new RecordingSession(audioCtx, config);
  }
}
chromium/ash/webui/recorder_app_ui/resources/core/recording_session.ts