// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import 'chrome://resources/mwc/@material/web/focus/md-focus-ring.js';
import 'chrome://resources/cros_components/button/button.js';
import {
classMap,
createRef,
css,
CSSResultGroup,
html,
ifDefined,
nothing,
PropertyDeclarations,
PropertyValues,
ref,
Ref,
repeat,
} from 'chrome://resources/mwc/lit/index.js';
import {i18n} from '../core/i18n.js';
import {ReactiveLitElement} from '../core/reactive/lit.js';
import {signal} from '../core/reactive/signal.js';
import {TextPart, Transcription} from '../core/soda/soda.js';
import {
assert,
assertExists,
assertInstanceof,
} from '../core/utils/assert.js';
import {formatDuration} from '../core/utils/datetime.js';
import {clamp, parseNumber, sliceWhen} from '../core/utils/utils.js';
import {
getNumSpeakerClass,
getSpeakerLabelClass,
SPEAKER_LABEL_COLORS,
} from './styles/speaker_label.js';
const SCROLL_MARGIN = 3;
function inBetween(x: number, [low, high]: [number, number]): boolean {
// Note that .scrollTo sometimes scroll slightly off to what's given as an
// argument, so we add a margin.
return (
x >= Math.min(low, high) - SCROLL_MARGIN &&
x <= Math.max(low, high) + SCROLL_MARGIN
);
}
export class TranscriptionView extends ReactiveLitElement {
static override styles: CSSResultGroup = [
SPEAKER_LABEL_COLORS,
css`
:host {
display: block;
position: relative;
}
#container {
box-sizing: border-box;
display: flex;
flex-flow: column;
gap: 12px;
max-height: 100%;
overflow-y: auto;
padding: 12px 0 64px;
width: 100%;
}
#transcript {
display: grid;
grid-template-columns:
minmax(calc(12px + 40px + 10px), max-content)
1fr;
}
.row {
display: grid;
grid-column: 1 / 3;
grid-template-columns: subgrid;
padding: 0 12px 0 0;
}
.timestamp {
/*
* Note that this need to be 0px instead of 0, since it's used in
* calc().
*/
--md-focus-ring-outward-offset: 0px;
--md-focus-ring-shape: 4px;
font: var(--cros-body-1-font);
/*
* Note that compared to the spec, 2px of left/right margin is moved to
* padding so it's included in the hover / focus ring.
*/
margin: 12px 8px 12px 10px;
outline: none;
padding: 0 2px;
place-self: start;
position: relative;
.seekable & {
cursor: pointer;
}
}
.paragraph {
font: var(--cros-body-1-font);
padding: 12px;
}
.highlight-word {
text-decoration: underline 1.5px;
text-underline-offset: 3px;
}
.speaker-label {
color: var(--speaker-label-shapes-color);
font: var(--cros-button-1-font);
margin: 0 0 4px;
.speaker-single & {
display: none;
}
}
.speaker-pending {
--speaker-label-shapes-color: var(--cros-sys-on_surface_variant);
}
.sentence {
border-radius: 4px;
box-decoration-break: clone;
-webkit-box-decoration-break: clone;
/* "Undo" the horizontal padding so the text aligns with the design. */
margin: 0 -2px;
/*
* Note that while the font size is 13px, the background height without
* padding would be 16px. Make it full line height (20px) by adding a
* 2px vertical padding. (horizontal padding happens to also be 2px).
*/
padding: 2px;
.seekable & {
cursor: pointer;
&:hover {
background: var(--cros-sys-highlight_shape);
}
}
.seekable .timestamp:hover + .paragraph > &:first-of-type {
background: var(--cros-sys-highlight_shape);
}
}
#autoscroll-button {
bottom: 16px;
left: 0;
margin: 0 auto;
position: absolute;
right: 0;
/* TODO(pihsun): Transition between shown/hide state */
#container.autoscroll + & {
display: none;
}
}
`,
];
static override properties: PropertyDeclarations = {
transcription: {attribute: false},
currentTime: {type: Number},
seekable: {type: Boolean},
};
transcription: Transcription|null = null;
currentTime: number|null = null;
seekable = false;
autoscrollEnabled = signal(true);
lastAutoScrollRange: [number, number]|null = null;
lastAutoScrollTime: number|null = null;
containerRef: Ref<HTMLElement> = createRef();
// TODO(pihsun): Move all the autoscroll logic to a separate file /
// ReactiveController.
//
// Autoscroll that is automatically stopped by user scroll is VERY hard
// to get 100% correct, since there's no way to distinguish scroll events
// that are originated from user input or autoscroll, especially when
// we want smooth scroll which generates multiple scroll events.
//
// Some other issue that can make things complicate:
// * Autoscroll can also be interrupted by user scroll, which doesn't
// generate a separate scrollend event.
// * There's no good way of knowing if calling a .scrollTo will generate any
// scroll/scrollend event.
//
// Desired behavior of the heuristic:
// * When there's no user input and either in recording or in playback, auto
// scroll must not stop by itself.
// * User scroll should stop the autoscroll, but it's fine if occasionally
// "small" scroll that occurs at the same time of autoscroll got ignored.
// * It's fine that other layout change originated from user (changing window
// width, ...) stops autoscroll.
// * Clicking the autoscroll button should start autoscroll.
//
// The current "simple" heuristic:
// * On each autoscroll, we remember the current scrollTop and the target
// scrollTop as the possible scroll event range due to autoscroll.
// * On each scroll event:
// * If the container is at bottom of scroll, don't stop autoscroll, since
// the scrollTop might jump back due to the scrollHeight change.
// * If there's no current autoscroll range, or the scrollTop falls outside
// of the autoscroll range, stop autoscroll.
//
private onContainerScroll() {
if (!this.autoscrollEnabled.value) {
return;
}
const container = assertExists(this.containerRef.value);
// When transcription is running, there's a chance that the transcription
// will become shorter due to intermediate partialResults, which results
// in the scrollTop being brought back. As a workaround, don't stop
// autoscroll if the scroll is at near the bottom of the screen.
if (container.scrollTop >=
container.scrollHeight - container.offsetHeight - SCROLL_MARGIN) {
return;
}
if (this.lastAutoScrollRange === null ||
!inBetween(container.scrollTop, this.lastAutoScrollRange)) {
this.autoscrollEnabled.value = false;
this.lastAutoScrollRange = null;
}
}
private onContainerScrollEnd() {
this.lastAutoScrollRange = null;
}
override updated(changedProperties: PropertyValues<this>): void {
if (!this.autoscrollEnabled.value) {
return;
}
if (this.seekable &&
(!changedProperties.has('currentTime') || this.currentTime === null)) {
// Optimization: Don't rerun autoscroll to the highlighted word if
// currentTime is not changed or is null.
return;
}
this.runAutoScroll();
}
private runAutoScroll() {
const now = Date.now();
// TODO(pihsun): Ideally we want to skip scrolling when there's an
// existing scrolling ongoing, but I can't managed to reliably get
// that information since Chrome sometimes deliver some scroll event
// without corresponding scrollend. Throttle the `scrollTo` instead
// and only do a scrollTo at most once every 500ms, to ensure the
// smooth scrolling can make some progress every time.
if (this.lastAutoScrollTime !== null &&
this.lastAutoScrollTime >= now - 500) {
// Autoscroll just happened in the last 500ms.
return;
}
const container = assertExists(this.containerRef.value);
let targetScrollTop: number;
if (this.seekable) {
// TODO(pihsun): We might need "fake" highlight blocks between speech so
// it'll scroll to the part between speech?
const highlightedElement =
this.shadowRoot?.querySelector('.highlight-word') ?? null;
if (highlightedElement === null) {
return;
}
// TODO(pihsun): Have a typed helper function for querySelector /
// querySelectorAll with assertion for types.
assert(highlightedElement instanceof HTMLElement);
// We calculate the target scrollTop by ourselves instead of relying on
// Element.scrollIntoView, so we can know the targetScrollTop for
// autoscroll calculation.
targetScrollTop = clamp(
highlightedElement.offsetTop + highlightedElement.offsetHeight / 2 -
container.clientHeight / 2,
0,
container.scrollHeight - container.offsetHeight,
);
} else {
// Auto scroll to bottom.
targetScrollTop = container.scrollHeight - container.offsetHeight;
}
// TODO(pihsun): scrollTo does nothing & don't call scrollend when the
// target top is almost the same to the current scrollTop. Check Chrome
// code to see what's the real condition on this.
if (Math.abs(container.scrollTop - targetScrollTop) >= SCROLL_MARGIN) {
this.lastAutoScrollRange = [container.scrollTop, targetScrollTop];
this.lastAutoScrollTime = now;
container.scrollTo({top: targetScrollTop, behavior: 'smooth'});
}
}
private renderSentence(sentence: TextPart[]) {
return repeat(
sentence,
(_v, i) => i,
(part, i) => {
const highlightWord = (() => {
if (this.currentTime === null || part.timeRange === null) {
return false;
}
return (
this.currentTime >= part.timeRange.startMs / 1000 &&
this.currentTime < part.timeRange.endMs / 1000
);
})();
// For the first word, the leadingSpace is already added at the
// sentence level. Otherwise we follows the leadingSpace for the part
// and treat missing field as having a space.
const leadingSpace = i === 0 ? false : part.leadingSpace ?? true;
if (!highlightWord) {
return `${leadingSpace ? ' ' : ''}${part.text}`;
}
return html`${leadingSpace ? ' ' : ''}<span class="highlight-word"
>${part.text}</span
>`;
},
);
}
private renderSpeakerLabel(
speakerLabels: string[],
speakerLabel: string|null,
partial: boolean,
) {
if (speakerLabel === null) {
return nothing;
}
let speakerLabelClass: string;
let speakerLabelLabel: string;
if (partial) {
speakerLabelClass = 'speaker-pending';
speakerLabelLabel = i18n.transcriptionSpeakerLabelPendingLabel;
} else {
const speakerLabelIdx = speakerLabels.indexOf(speakerLabel);
assert(speakerLabelIdx !== -1);
speakerLabelClass = getSpeakerLabelClass(speakerLabelIdx);
speakerLabelLabel = i18n.transcriptionSpeakerLabelLabel(speakerLabel);
}
return html`<div class="speaker-label ${speakerLabelClass}">
${speakerLabelLabel}
</div>`;
}
private renderParagraphContent(parts: TextPart[]) {
if (!this.seekable) {
// Don't render each sentence/word as separate DOM node when there's no
// need for seeking, so there would be fewer DOM nodes.
return parts
.map((part, i) => {
const leadingSpace = part.leadingSpace ?? i > 0;
return `${leadingSpace ? ' ' : ''}${part.text}`;
})
.join('');
}
// TODO: b/341014241 - Better heuristic for cutting sentences.
const sentences = sliceWhen(parts, ({text}) => {
return text.endsWith('.') || text.endsWith('?') || text.endsWith('!');
});
return repeat(
sentences,
(_v, i) => i,
(sentence, i) => {
// Use the leadingSpace field for the first word. If the
// leadingSpace field is missing, add space after the first
// sentence.
const leadingSpace = sentence[0]?.leadingSpace ?? i > 0;
return html`${leadingSpace ? ' ' : ''}<span
class="sentence"
data-start-ms=${ifDefined(sentence[0]?.timeRange?.startMs)}
>${this.renderSentence(sentence)}</span
>`;
},
);
}
private renderParagraph(speakerLabels: string[], parts: TextPart[]) {
const {speakerLabel, partial} = assertExists(parts[0]);
return [
this.renderSpeakerLabel(speakerLabels, speakerLabel, partial ?? false),
this.renderParagraphContent(parts),
];
}
private onTextClick(ev: MouseEvent) {
const target = assertInstanceof(ev.target, HTMLElement);
const parent = target.closest('[data-start-ms]');
if (parent === null) {
return;
}
const startMs = parseNumber(
assertInstanceof(parent, HTMLElement).dataset['startMs'],
);
if (startMs === null) {
return;
}
this.dispatchEvent(
new CustomEvent('word-clicked', {detail: {startMs}}),
);
this.autoscrollEnabled.value = true;
}
private onAutoScrollButtonClick() {
this.autoscrollEnabled.value = true;
this.runAutoScroll();
}
override render(): RenderResult {
if (this.transcription === null) {
return nothing;
}
const speakerLabels = this.transcription.getSpeakerLabels();
const paragraphs = this.transcription.getParagraphs();
const content = repeat(
paragraphs,
(_parts, i) => i,
(parts) => {
const startTimeRange = assertExists(parts[0]).timeRange;
const startTimeDisplay =
startTimeRange === null ? '?' : formatDuration({
milliseconds: startTimeRange.startMs,
});
// TODO(pihsun): Check if there's any case that timestamp will be
// missing.
// TODO(pihsun): Handle keyboard event / a11y on the timestamp.
// TODO(pihsun): Check performance? Try to do CSS only highlight when
// only currentTime are changed, so the whole template don't need to
// be re-computed.
return html`
<div class="row">
<span
class="timestamp"
tabindex=${this.seekable ? 0 : -1}
data-start-ms=${ifDefined(startTimeRange?.startMs)}
>
${startTimeDisplay}
${this.seekable ? html`<md-focus-ring></md-focus-ring>` : nothing}
</span>
<div class="paragraph">
${this.renderParagraph(speakerLabels, parts)}
</div>
</div>
`;
},
);
const classes = {
seekable: this.seekable,
autoscroll: this.autoscrollEnabled.value,
[getNumSpeakerClass(speakerLabels.length)]: true,
};
// TODO(pihsun): @click on #transcript is a performance optimization to
// only have the click handler on the container. Need to adjust this
// accordingly when we have other clickable things inside the container
// (speaker label).
return html`<div
id="container"
class=${classMap(classes)}
${ref(this.containerRef)}
@scroll=${this.onContainerScroll}
@scrollend=${this.onContainerScrollEnd}
>
<slot></slot>
<div
id="transcript"
@click=${this.seekable ? this.onTextClick : nothing}
>
${content}
</div>
</div>
<cros-button
button-style="secondary"
id="autoscroll-button"
label=${i18n.transcriptionAutoscrollButton}
@click=${this.onAutoScrollButtonClick}
></cros-button>`;
}
}
window.customElements.define('transcription-view', TranscriptionView);
declare global {
interface HTMLElementTagNameMap {
'transcription-view': TranscriptionView;
}
}