chromium/ash/webui/recorder_app_ui/resources/components/audio-waveform.ts

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import {
  classMap,
  css,
  html,
  nothing,
  PropertyDeclarations,
  repeat,
  svg,
} from 'chrome://resources/mwc/lit/index.js';

import {
  POWER_SCALE_FACTOR,
  SAMPLE_RATE,
  SAMPLES_PER_SLICE,
} from '../core/audio_constants.js';
import {i18n} from '../core/i18n.js';
import {ReactiveLitElement} from '../core/reactive/lit.js';
import {computed} from '../core/reactive/signal.js';
import {Transcription} from '../core/soda/soda.js';
import {
  assert,
  assertExists,
  assertInstanceof,
} from '../core/utils/assert.js';
import {InteriorMutableArray} from '../core/utils/interior_mutable_array.js';

import {
  getNumSpeakerClass,
  getSpeakerLabelClass,
  SPEAKER_LABEL_COLORS,
} from './styles/speaker_label.js';

const BAR_WIDTH = 4;
const BAR_GAP = 5;
const BAR_MIN_HEIGHT = 4.5;
const BAR_MAX_HEIGHT = 100;

const SPEAKER_LABEL_LINE_HEIGHT = 128;

// We don't use DOMRect since it's much slower.
interface Rect {
  x: number;
  y: number;
  width: number;
  height: number;
}

// TODO(pihsun): Is there some way to set .viewBox.baseVal?
function toViewBoxString(viewBox: Rect|null): string|typeof nothing {
  if (viewBox === null) {
    return nothing;
  }
  const {x, y, width, height} = viewBox;
  return `${x} ${y} ${width} ${height}`;
}

/*
 * There are multiple different coordinate system for the "timestamp" of the
 * waveform used in this component:
 * (1) Time (in seconds). Each second contains SAMPLE_RATE audio samples.
 * (2) Index of the "bar" in the waveform, starting from 0. Each "bar" is an
 *     aggregate of SAMPLES_PER_SLICE audio samples. So index 0 corresponds to
 *     [0, SAMPLES_PER_SLICE) audio samples, index 1 corresponds to
 *     [SAMPLES_PER_SLICE, 2*SAMPLES_PER_SLICE) audio samples, and so on...
 * (3) The x coordinate that is rendered in the SVG. Time 0 always corresponds
 *     to x = 0, and the viewBox of the whole SVG is set to show around the
 *     current time.
 *
 * `timestampToBarIndex` converts from (1) to (2), `getBarX` converts from
 * (2) to (3), and `xCoordinateToRoughIdx` converts from (3) to (2).
 *
 * Since the whole waveform looks better when things are aligned to bar, most
 * variables (ended in BarIdx) are in the coordinate of (2).
 *
 * Also note that to render separator between bars, sometimes the "index" in (2)
 * have 0.5 in fractions, but those values should only be used to convert to
 * rendered x coordinate (3) and doesn't corresponds to actual slice of audio
 * samples.
 */
function timestampToBarIndex(seconds: number): number {
  return Math.floor((seconds * SAMPLE_RATE) / SAMPLES_PER_SLICE);
}

function getBarX(barIdx: number): number {
  return barIdx * (BAR_WIDTH + BAR_GAP);
}

function xCoordinateToRoughIdx(x: number): number {
  return Math.floor(x / (BAR_WIDTH + BAR_GAP));
}

/**
 * Range of bars that should have the same speaker label.
 *
 * The range is [startBarIdx, endBarIdx), that is it includes startBarIdx but
 * excludes endBarIdx.
 *
 * Note that since each "bar" corresponds to several samples, and the speaker
 * label can be different during those samples, so we define each "bar" to have
 * speaker label as the speaker label at the end of the time range that the bar
 * corresponds to.
 */
interface SpeakerLabelRange {
  startBarIdx: number;
  endBarIdx: number;
  speakerLabelIndex: number;
}

interface SpeakerLabelInfo {
  speakerLabels: string[];

  /**
   * The ranges of speaker labels.
   *
   * The ranges are sorted and non-overlapping, and there could be gaps (part of
   * audio without speaker label) between ranges.
   */
  ranges: SpeakerLabelRange[];
}

/**
 * Component for showing audio waveform.
 */
export class AudioWaveform extends ReactiveLitElement {
  static override styles = [
    SPEAKER_LABEL_COLORS,
    css`
      :host {
        display: block;
        position: relative;
      }

      #chart {
        inset: 0;
        position: absolute;
      }

      .speaker-single {
        & .range {
          display: none;
        }
      }

      .speaker-duo,
      .speaker-multiple {
        & .no-speaker {
          --speaker-label-shapes-color: var(--cros-sys-primary_container);
        }
      }

      .speaker-range-start {
        /* The dash and space looks equal length with rounded linecap. */
        stroke-dasharray: 2, 6;
        stroke-linecap: round;
        stroke-width: 2;
        stroke: var(--speaker-label-shapes-color);

        .speaker-single & {
          display: none;
        }

        &.future {
          opacity: var(--cros-disabled-opacity);
        }

        .range:hover & {
          stroke-dasharray: none;
        }
      }

      .bar {
        /* Don't block hover on the background. */
        pointer-events: none;

        .speaker-single & {
          fill: var(--cros-sys-primary);

          &.future {
            fill: var(--cros-sys-primary_container);
          }
        }

        :is(.speaker-duo, .speaker-multiple) & {
          fill: var(--speaker-label-shapes-color);

          &.future {
            opacity: var(--cros-disabled-opacity);
          }
        }
      }

      .background {
        /* fill: none prevents :hover state, so we set opacity: 0 instead. */
        opacity: 0;
        fill: var(--speaker-label-container-color);

        .range:hover & {
          opacity: 1;

          &.future {
            opacity: var(--cros-disabled-opacity);
          }
        }
      }

      .speaker-label {
        align-items: center;
        background: var(--speaker-label-shapes-color);
        border-radius: 10px 10px 10px 0;
        bottom: 0;
        box-sizing: border-box;
        color: var(--speaker-label-label-color);
        display: flex;
        font: var(--cros-label-1-font);
        height: 20px;
        justify-content: center;
        left: 0;
        min-width: 20px;
        padding: 4px;
        position: absolute;
        width: fit-content;

        &.outside {
          display: none;
        }

        & > .full {
          display: none;
        }

        .range:hover & {
          display: block;

          /* TODO: b/336963138 - Animation on hover? */
          height: 26px;
          padding: 8px;

          & > .full {
            display: inline;
          }

          & > .short {
            display: none;
          }
        }
      }

      .playhead {
        fill: var(--cros-sys-on_surface_variant);

        /* Don't block hover on the background. */
        pointer-events: none;
      }
    `,
  ];

  static override properties: PropertyDeclarations = {
    values: {attribute: false},
    size: {state: true},
    currentTime: {type: Number},
    transcription: {attribute: false},
  };

  // Values to be shown as bars. Should be in range [0, POWER_SCALE_FACTOR - 1].
  values = new InteriorMutableArray<number>([]);

  currentTime: number|null = null;

  private readonly currentTimeSignal = this.propSignal('currentTime');

  private readonly currentTimeBarIdx = computed(() => {
    if (this.currentTimeSignal.value === null) {
      return null;
    }
    return timestampToBarIndex(this.currentTimeSignal.value);
  });

  private size: DOMRect|null = null;

  transcription: Transcription|null = null;

  private readonly transcriptionSignal = this.propSignal('transcription');

  private readonly speakerLabelInfo = computed((): SpeakerLabelInfo => {
    const transcription = this.transcriptionSignal.value;
    if (transcription === null) {
      return {
        speakerLabels: [],
        ranges: [],
      };
    }
    const paragraphs = transcription.getParagraphs();
    const speakerLabels = transcription.getSpeakerLabels();
    const ranges: SpeakerLabelRange[] = [];
    for (const paragraph of paragraphs) {
      const firstPart = assertExists(paragraph[0]);
      const lastPart = assertExists(paragraph.at(-1));

      const speakerLabel = firstPart.speakerLabel;
      if (speakerLabel === null) {
        // The paragraph doesn't have speaker label.
        continue;
      }
      const speakerLabelIndex = speakerLabels.indexOf(speakerLabel);
      assert(speakerLabelIndex !== -1);

      const startMs = firstPart.timeRange?.startMs ?? null;
      const endMs = lastPart.timeRange?.endMs ?? null;
      if (startMs === null || endMs === null) {
        // TODO(pihsun): Check if there's any possibility that the timestamp is
        // missing.
        continue;
      }
      // The timestamps should be increasing.
      assert(startMs <= endMs);

      const startBarIdx = timestampToBarIndex(startMs / 1000);
      const endBarIdx = timestampToBarIndex(endMs / 1000);
      assert(
        ranges.length === 0 ||
          assertExists(ranges.at(-1)).endBarIdx <= startBarIdx,
      );
      // These can be equal if there's a very short paragraph with speaker
      // label.
      if (startBarIdx !== endBarIdx) {
        ranges.push({
          speakerLabelIndex,
          startBarIdx,
          endBarIdx,
        });
      }
    }
    return {
      speakerLabels,
      ranges,
    };
  });

  private readonly resizeObserver = new ResizeObserver(() => {
    this.size = this.getBoundingClientRect();
  });

  get chart(): SVGElement {
    return assertInstanceof(
      assertExists(this.shadowRoot).querySelector('#chart'),
      SVGElement,
    );
  }

  // TODO(pihsun): Check if we can use ResizeObserver in @lit-labs/observers.
  override connectedCallback(): void {
    super.connectedCallback();
    this.resizeObserver.observe(this);
  }

  override disconnectedCallback(): void {
    super.disconnectedCallback();
    this.resizeObserver.disconnect();
  }

  private getBarLocation(
    idx: number,
    val: number,
    minHeight: number,
    maxHeight: number,
  ): Rect {
    const width = BAR_WIDTH;
    const height =
      minHeight + (maxHeight - minHeight) * (val / (POWER_SCALE_FACTOR - 1));
    const x = getBarX(idx) - width / 2;
    const y = -height / 2;

    return {x, y, width, height};
  }

  private isAfterCurrentTime(idx: number) {
    return (
      this.currentTimeBarIdx.value !== null &&
      idx >= this.currentTimeBarIdx.value
    );
  }

  private renderSpeakerRangeStart({
    startBarIdx,
    speakerLabelIndex,
  }: SpeakerLabelRange) {
    const startX = getBarX(startBarIdx - 0.5);

    const classes = {
      [getSpeakerLabelClass(speakerLabelIndex)]: true,
      future: this.isAfterCurrentTime(startBarIdx),
    };
    const height = SPEAKER_LABEL_LINE_HEIGHT;
    // clang-format off
    return svg`<line
      x1=${startX}
      x2=${startX}
      y1=${-height / 2}
      y2=${height / 2}
      class="speaker-range-start ${classMap(classes)} "
    />`;
    // clang-format on
  }

  private renderSpeakerRangeLabel(
    speakerLabels: string[],
    {startBarIdx, speakerLabelIndex}: SpeakerLabelRange,
    viewBox: Rect,
  ) {
    // minus one so it aligns with the left edge of the speaker label range
    // start.
    const startX = getBarX(startBarIdx - 0.5) - 1;

    const classes = {
      [getSpeakerLabelClass(speakerLabelIndex)]: true,
      outside: startX < viewBox.x,
    };

    const maxHeight = 26;
    // Always render the label in view. It'll be hidden until hover if it's
    // originally outside of the view. Note that only the label that has some
    // corresponding bar inside view will be rendered (see renderSvgContent).
    const x = Math.max(startX, viewBox.x);
    const y = -SPEAKER_LABEL_LINE_HEIGHT / 2 - maxHeight;
    const shortLabel = assertExists(speakerLabels[speakerLabelIndex]);
    const fullLabel = i18n.transcriptionSpeakerLabelLabel(shortLabel);

    // clang-format off
    // The width/height on foreignObject is necessary for the div to be shown,
    // but the actual label size can be smaller than that.
    // TODO(pihsun): This introduce a bit more hover space than the visible
    // labels. Check if there's a better way to do this.
    return svg`<foreignObject
      x=${x}
      y=${y}
      width="100"
      height=${maxHeight}
    >
      <div class="speaker-label ${classMap(classes)}">
        <span class="short">${shortLabel}</span>
        <span class="full">${fullLabel}</span>
      </div>
    </foreignObject>`;
    // clang-format on
  }

  /**
   * Returns the background path with the top-right and bottom-right corner
   * rounded.
   */
  private getBackgroundPath(startX: number, endX: number) {
    const height = SPEAKER_LABEL_LINE_HEIGHT;
    const radius = 12;
    // clang-format off
    return `
      M ${startX} ${-height / 2}
      v ${height}
      H ${endX - radius}
      a ${radius} ${radius} 0 0 0 ${radius} ${-radius}
      V ${-height / 2 + radius}
      a ${radius} ${radius} 0 0 0 ${-radius} ${-radius}
      H ${startX}
    `;
    // clang-format on
  }

  private renderSpeakerRangeBackground({
    startBarIdx,
    endBarIdx,
    speakerLabelIndex,
  }: SpeakerLabelRange) {
    const startX = getBarX(startBarIdx - 0.5);
    const endX = getBarX(endBarIdx - 0.5);

    const classes: Record<string, boolean> = {
      [getSpeakerLabelClass(speakerLabelIndex)]: true,
    };

    const currentTimeIdx = this.currentTimeBarIdx.value;
    if (currentTimeIdx !== null && startBarIdx <= currentTimeIdx &&
        currentTimeIdx < endBarIdx) {
      // Part of the background are before and part are after. Need to cut the
      // background in half.
      const centerX = getBarX(currentTimeIdx) - BAR_WIDTH / 2;
      const height = SPEAKER_LABEL_LINE_HEIGHT;
      const y = -height / 2;
      return [
        svg`<rect
          x=${startX}
          y=${y}
          width=${centerX - startX}
          height=${height}
          class="background ${classMap(classes)}"
        />`,
        svg`<path
          d=${this.getBackgroundPath(centerX, endX)}
          class="background future ${classMap(classes)}"
        />`,
      ];
    } else {
      classes['future'] = this.isAfterCurrentTime(startBarIdx);
      return svg`<path
        d=${this.getBackgroundPath(startX, endX)}
        class="background ${classMap(classes)}"
      />`;
    }
  }

  private renderSpeakerRange(
    speakerLabels: string[],
    range: SpeakerLabelRange,
    viewBox: Rect,
  ) {
    return svg`<g class="range">
      ${this.renderSpeakerRangeBackground(range)}
      ${this.renderSpeakerRangeStart(range)}
      ${this.renderSpeakerRangeLabel(speakerLabels, range, viewBox)}
    </g>`;
  }

  private renderCurrentTimeBar(viewBox: Rect) {
    if (this.currentTimeBarIdx.value === null) {
      return nothing;
    }
    const width = 2;
    // Add the progress indicator at the current time. Draw on the left side
    // of the current bar so it looks more "correct" when jumping to the start
    // of a paragraph with speaker label.
    const x = getBarX(this.currentTimeBarIdx.value) - BAR_WIDTH / 2 - width;
    const y = viewBox.y;
    return svg`<rect
      x=${x}
      y=${y}
      width=${width}
      height=${viewBox.height}
      rx="1"
      class="playhead"
    />`;
  }

  private renderAudioBars(viewBox: Rect) {
    if (this.values.length === 0) {
      return nothing;
    }

    const speakerLabelRanges = this.speakerLabelInfo.value.ranges;
    let currentSpeakerLabelRangeIdx = 0;
    let currentSpeakerLabelRangeRendered = false;

    /**
     * Gets the speaker label index of a bar index.
     *
     * The `barIdx` given to this function needs to be increasing across
     * multiple calls, since this is implemented by scanning through the
     * speakerLabelRanges.
     */
    function getSpeakerLabelRange(barIdx: number): SpeakerLabelRange|null {
      while (currentSpeakerLabelRangeIdx < speakerLabelRanges.length) {
        const range = assertExists(
          speakerLabelRanges[currentSpeakerLabelRangeIdx],
        );
        if (barIdx < range.startBarIdx) {
          return null;
        }
        if (barIdx < range.endBarIdx) {
          return range;
        }
        currentSpeakerLabelRangeIdx += 1;
        currentSpeakerLabelRangeRendered = false;
      }
      return null;
    }

    // This is an optimization to not goes through the whole values array, and
    // directly calculate the part that needs to be rendered instead. To
    // simplify the logic we calculate the rough range and just extend it a bit
    // to make sure we covers the whole range.
    const startIdx = Math.max(xCoordinateToRoughIdx(viewBox.x) - 5, 0);
    const endIdx = Math.min(
      xCoordinateToRoughIdx(viewBox.x + viewBox.width) + 5,
      this.values.length - 1,
    );

    if (endIdx < startIdx) {
      return nothing;
    }

    const idxRange = Array.from(
      {length: endIdx - startIdx + 1},
      (_, i) => i + startIdx,
    );

    return repeat(
      idxRange,
      (i) => i,
      (i) => {
        const ret: RenderResult[] = [];

        const val = assertExists(this.values.array[i]);
        const rect = this.getBarLocation(
          i,
          val,
          BAR_MIN_HEIGHT,
          Math.min(viewBox.height, BAR_MAX_HEIGHT),
        );
        if (rect.x + rect.width < viewBox.x ||
            rect.x > viewBox.x + viewBox.width) {
          return nothing;
        }
        const classes: Record<string, boolean> = {
          future: this.isAfterCurrentTime(i),
        };
        const range = getSpeakerLabelRange(i);

        if (range !== null) {
          if (!currentSpeakerLabelRangeRendered) {
            ret.push(
              this.renderSpeakerRange(
                this.speakerLabelInfo.value.speakerLabels,
                range,
                viewBox,
              ),
            );
            currentSpeakerLabelRangeRendered = true;
          }

          classes[getSpeakerLabelClass(range.speakerLabelIndex)] = true;
        } else {
          classes['no-speaker'] = true;
        }
        ret.push(svg`<rect
          x=${rect.x}
          y=${rect.y}
          width=${rect.width}
          height=${rect.height}
          rx=${rect.width / 2}
          class="bar ${classMap(classes)}"
        />`);
        return ret;
      },
    );
  }

  private renderSvgContent(viewBox: Rect|null) {
    if (viewBox === null) {
      return nothing;
    }
    return [this.renderAudioBars(viewBox), this.renderCurrentTimeBar(viewBox)];
  }

  private getViewBox(): Rect|null {
    if (this.size === null) {
      return null;
    }
    const {width, height} = this.size;
    const x = (() => {
      if (this.currentTimeBarIdx.value !== null) {
        const x = getBarX(this.currentTimeBarIdx.value);
        // Put the current time in the center.
        // TODO(pihsun): Should this be controlled by a separate property?
        // TODO(pihsun): Should we use the real time offset, instead of
        // aligning to the bar?
        return x - width / 2;
      } else {
        return this.values.length * (BAR_WIDTH + BAR_GAP) - width;
      }
    })();
    const y = -height / 2;
    return {x, y, width, height};
  }

  override render(): RenderResult {
    if (this.size === null) {
      return nothing;
    }

    const numSpeakerClass = getNumSpeakerClass(
      this.speakerLabelInfo.value.speakerLabels.length,
    );

    const viewBox = this.getViewBox();

    // TODO(pihsun): Performance doesn't seem to be ideal for rendering this
    // with svg. Measure it for longer recording and see if there's other way to
    // do it. (Draw on canvas directly?)
    return html`<svg
      id="chart"
      viewBox=${toViewBoxString(viewBox)}
      class=${numSpeakerClass}
    >
      ${this.renderSvgContent(viewBox)}
    </svg>`;
  }
}

window.customElements.define('audio-waveform', AudioWaveform);

declare global {
  interface HTMLElementTagNameMap {
    'audio-waveform': AudioWaveform;
  }
}