// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import {AutomationPredicate} from '/common/automation_predicate.js';
import {AutomationUtil} from '/common/automation_util.js';
import {constants} from '/common/constants.js';
import {FlagName, Flags} from '/common/flags.js';
import {NodeNavigationUtils} from '/common/node_navigation_utils.js';
import {NodeUtils} from '/common/node_utils.js';
import {ParagraphUtils} from '/common/paragraph_utils.js';
import {TestImportManager} from '/common/testing/test_import_manager.js';
import {WordUtils} from '/common/word_utils.js';
import {InputHandler} from './input_handler.js';
import {MetricsUtils} from './metrics_utils.js';
import {PrefsManager} from './prefs_manager.js';
import {SelectToSpeakConstants} from './select_to_speak_constants.js';
import {TtsManager} from './tts_manager.js';
import {SelectToSpeakUiListener, UiManager} from './ui_manager.js';
type AutomationNode = chrome.automation.AutomationNode;
import AutomationEvent = chrome.automation.AutomationEvent;
import EventType = chrome.automation.EventType;
import RoleType = chrome.automation.RoleType;
import SelectToSpeakState = chrome.accessibilityPrivate.SelectToSpeakState;
// Matches one of the known GSuite apps which need the clipboard to find and
// read selected text. Includes sandbox and non-sandbox versions.
const GSUITE_APP_REGEXP =
/^https:\/\/docs\.(?:sandbox\.)?google\.com\/(?:(?:presentation)|(?:document)|(?:spreadsheets)|(?:drawings)|(?:scenes)){1}\//;
/**
* Determines if a node is in one of the known Google GSuite apps that needs
* special case treatment for speaking selected text. Not all Google GSuite
* pages are included, because some are not known to have a problem with
* selection: Forms is not included since it's relatively similar to any HTML
* page, for example.
* @param node The node to check
* @return The root node of the GSuite app, or null if none is found.
*/
export function getGSuiteAppRoot(node: AutomationNode|
undefined): AutomationNode|null {
while (node !== undefined && node.root !== undefined) {
if (node.root.url !== undefined && GSUITE_APP_REGEXP.exec(node.root.url)) {
return node.root;
}
node = node.root.parent;
}
return null;
}
/**
* Select-to-speak component extension controller.
*/
export class SelectToSpeak implements SelectToSpeakUiListener {
private currentCharIndex_: number;
private currentNodeGroupIndex_: number;
// TODO(b/314203187): In many places we've added a currentNodeGroupItem_!,
// determine if this is correct or if a check should be added.
private currentNodeGroupItem_: ParagraphUtils.NodeGroupItem|null;
private currentNodeGroupItemIndex_: number;
private currentNodeGroups_: ParagraphUtils.NodeGroup[];
private currentNodeWord_: {start: number, end: number}|null;
private desktop_: AutomationNode|undefined;
private inputHandler_: InputHandler|null;
private intervalId_: number|undefined;
private nullSelectionTone_: HTMLAudioElement;
private onStateChangeRequestedCallbackForTest_: (() => void)|null;
private prefsManager_: PrefsManager;
private scrollToSpokenNode_: boolean;
private speechRateMultiplier_: number;
private state_: SelectToSpeakState;
private supportsNavigationPanel_: boolean;
private ttsManager_: TtsManager;
private uiManager_: UiManager;
private onLoadDesktopCallbackForTest_: (() => void)|null;
/** Please keep fields in alphabetical order. */
constructor() {
/**
* The start char index of the word to be spoken. The index is relative
* to the text content of the current node group.
*/
this.currentCharIndex_ = -1;
/**
* The index for the node group currently being spoken in
* |this.currentNodeGroups_|.
*/
this.currentNodeGroupIndex_ = -1;
/**
* The node group item currently being spoken. A node group item is a
* representation of the original input nodes, but may not be the same. For
* example, an input inline text node will be represented by its static text
* node in the node group item.
*/
this.currentNodeGroupItem_ = null;
/**
* The index for the current node group item within the current node group,
* The current node group can be accessed from |this.currentNodeGroups_|
* using |this.currentNodeGroupIndex_|. In most cases,
* |this.currentNodeGroupItemIndex_| can be used to get
* |this.currentNodeGroupItem_| from the current node group. However, in
* Gsuite, we will have node group items outside of a node group.
*/
this.currentNodeGroupItemIndex_ = -1;
/**
* The node groups to be spoken. We process content into node groups and
* pass one node group at a time to the TTS engine. Note that we do not use
* node groups for user-selected text in Gsuite. See more details in
* readNodesBetweenPositions_.
*/
this.currentNodeGroups_ = [];
/**
* The indexes within the current node group item representing the word
* currently being spoken. Only updated if word highlighting is enabled.
*/
this.currentNodeWord_ = null;
this.desktop_;
this.inputHandler_ = null;
/**
* The interval ID from a call to setInterval, which is set whenever
* speech is in progress.
*/
this.intervalId_;
this.nullSelectionTone_ = new Audio('earcons/null_selection.ogg');
/**
* Function to be called when a state change request is received from the
* accessibilityPrivate API.
*/
this.onStateChangeRequestedCallbackForTest_ = null;
this.prefsManager_ = new PrefsManager();
this.scrollToSpokenNode_ = false;
/** Speech rate multiplier. */
this.speechRateMultiplier_ = 1.0;
/**
* The current state of the SelectToSpeak extension, from
* SelectToSpeakState.
*/
this.state_ = SelectToSpeakState.INACTIVE;
/**
* Whether the current nodes support use of the navigation panel.
*/
this.supportsNavigationPanel_ = true;
this.ttsManager_ = new TtsManager();
this.uiManager_ = new UiManager(this.prefsManager_, /*listener=*/ this);
this.onLoadDesktopCallbackForTest_ = null;
this.init_();
}
private async init_(): Promise<void> {
chrome.automation.getDesktop(desktop => {
this.desktop_ = desktop;
// After the user selects a region of the screen, we do a hit test at
// the center of that box using the automation API. The result of the
// hit test is a MOUSE_RELEASED accessibility event.
desktop.addEventListener(
EventType.MOUSE_RELEASED, evt => this.onAutomationHitTest_(evt),
true);
// Chrome PDF Viewer with PDF OCR sends a layout complete event when
// finishing extracting text from inaccessible PDF pages.
desktop.addEventListener(
EventType.LAYOUT_COMPLETE, evt => this.onLayoutComplete_(evt), true);
if (this.onLoadDesktopCallbackForTest_) {
this.onLoadDesktopCallbackForTest_();
this.onLoadDesktopCallbackForTest_ = null;
}
});
this.prefsManager_.initPreferences();
this.runContentScripts_();
this.setUpEventListeners_();
await Flags.init();
const createArgs: chrome.contextMenus.CreateProperties = {
title: chrome.i18n.getMessage(
'select_to_speak_listen_context_menu_option_text'),
contexts: [chrome.contextMenus.ContextType.SELECTION],
id: 'select_to_speak',
};
if (Flags.isEnabled(FlagName.MANIFEST_V3)) {
chrome.contextMenus.onClicked.addListener(() => {
this.getFocusedNodeAndSpeakSelectedText_();
});
} else {
createArgs['onclick'] = () => {
this.getFocusedNodeAndSpeakSelectedText_();
};
}
// Install the context menu in the Ash browser.
await chrome.contextMenus.create(createArgs);
// Listen for context menu clicks from other contexts (like Lacros).
chrome.accessibilityPrivate.onSelectToSpeakContextMenuClicked.addListener(
() => {
this.getFocusedNodeAndSpeakSelectedText_();
});
}
/**
* Gets the node group currently being spoken.
*/
private getCurrentNodeGroup_(): ParagraphUtils.NodeGroup|undefined {
if (this.currentNodeGroups_.length === 0) {
return undefined;
}
return this.currentNodeGroups_[this.currentNodeGroupIndex_];
}
/**
* Determines if navigation controls should be shown (and other related
* functionality, such as auto-dismiss and click-to-navigate to sentence,
* should be activated) based on feature flag and user setting.
*/
private shouldShowNavigationControls_(): boolean {
return this.prefsManager_.navigationControlsEnabled() &&
this.supportsNavigationPanel_;
}
/**
* Read the status message under the status node in a PDF accessibility tree
* if PDF content is still being loaded. In the loading phase, the PDF a11y
* tree will have one child node with the banner role, which contains the
* loading status message as follows:
* pdfRoot
* - banner
* -- status
* --- staticText: "Loading PDF"
*/
private readPdfStatusNodeIfStillLoading_(pdfRoot: AutomationNode): boolean {
if (pdfRoot.role === RoleType.PDF_ROOT && pdfRoot.children.length === 1 &&
pdfRoot.firstChild!.role === RoleType.BANNER &&
pdfRoot.firstChild!.children.length === 1 &&
pdfRoot.firstChild!.firstChild!.role === RoleType.STATUS &&
pdfRoot.firstChild!.firstChild!.children.length === 1 &&
pdfRoot.firstChild!.firstChild!.firstChild!.role ===
RoleType.STATIC_TEXT) {
this.startSpeechQueue_([pdfRoot.firstChild!.firstChild!.firstChild!], {
clearFocusRing: true,
});
return true;
}
return false;
}
private onLayoutComplete_(evt: AutomationEvent): void {
const root: AutomationNode = evt.target;
if (!root.url || !root.url.endsWith('.pdf')) {
return;
}
// Check if it's Chrome PDF Viewer with PDF OCR in the full-page view.
const pdfRoot: AutomationNode|null = root.find({role: RoleType.PDF_ROOT});
if (!pdfRoot) {
return;
}
this.recordOcredPagesInPdf_(pdfRoot);
}
/**
* Record the number of OCRed pages in the PDF accessibility tree.
*/
private recordOcredPagesInPdf_(pdfRoot: AutomationNode): void {
// When PDF OCR successfully extracts text from inaccessible PDF pages, PDF
// pages with OCRed content will have the "ocred_page" class name.
const orcedPages = pdfRoot.findAll({attributes: {className: 'ocred_page'}});
MetricsUtils.recordNumPdfPagesOcred(orcedPages.length);
}
/**
* Called in response to our hit test after the mouse is released,
* when the user is in a mode where Select-to-speak is capturing
* mouse events (for example holding down Search).
* @param evt The automation event from the hit test.
*/
private onAutomationHitTest_(evt: AutomationEvent): void {
// Walk up to the nearest window, web area, toolbar, or dialog that the
// hit node is contained inside. Only speak objects within that
// container. In the future we might include other container-like
// roles here.
var root = evt.target;
// In Chrome PDF Viewer, PDF content for a large PDF might be still being
// loaded into a PDF accessibility tree when the user selects text on a PDF
// page. In this case, the PDF root has only one child node, which is the
// status node that contains a loading status message. Read this status
// message if the user tries selecting text during this loading phase.
if (root.role === RoleType.EMBEDDED_OBJECT && root.children.length === 1 &&
root.firstChild!.role === RoleType.PDF_ROOT &&
root.firstChild!.children.length === 1 &&
this.readPdfStatusNodeIfStillLoading_(root.firstChild!)) {
return;
}
// TODO: Use AutomationPredicate.root instead?
while (root.parent && root.role !== RoleType.WINDOW &&
root.role !== RoleType.ROOT_WEB_AREA &&
root.role !== RoleType.DESKTOP && root.role !== RoleType.DIALOG &&
root.role !== RoleType.ALERT_DIALOG &&
root.role !== RoleType.TOOLBAR) {
root = root.parent;
}
var rect = this.inputHandler_!.getMouseRect();
var nodes: AutomationNode[] = [];
chrome.automation.getFocus(focusedNode => {
// In some cases, e.g. ARC++, the window received in the hit test request,
// which is computed based on which window is the event handler for the
// hit point, isn't the part of the tree that contains the actual
// content. In such cases, use focus to get the root.
// TODO(katie): Determine if this work-around needs to be ARC++ only. If
// so, look for classname exoshell on the root or root parent to confirm
// that a node is in ARC++.
if (!NodeUtils.findAllMatching(root, rect, nodes) && focusedNode &&
focusedNode.root!.role !== RoleType.DESKTOP) {
// TODO(b/314203187): Determine if not null assertion is appropriate
// here.
NodeUtils.findAllMatching(focusedNode.root!, rect, nodes);
}
if (nodes.length === 1 && UiManager.isTrayButton(nodes[0])) {
// Don't read only the Select-to-Speak toggle button in the tray unless
// more items are being read.
return;
}
if (this.shouldShowNavigationControls_() && nodes.length > 0 &&
(rect.width <= SelectToSpeakConstants.PARAGRAPH_SELECTION_MAX_SIZE ||
rect.height <=
SelectToSpeakConstants.PARAGRAPH_SELECTION_MAX_SIZE)) {
// If this is a single click (zero sized selection) on a text node, then
// expand to entire paragraph.
nodes = NodeUtils.getAllNodesInParagraph(nodes[0]);
}
this.startSpeechQueue_(nodes, {
clearFocusRing: true,
});
MetricsUtils.recordStartEvent(
MetricsUtils.StartSpeechMethod.MOUSE, this.prefsManager_);
});
}
private getFocusedNodeAndSpeakSelectedText_(): void {
chrome.automation.getFocus(
focusedNode => this.requestSpeakSelectedText_(
MetricsUtils.StartSpeechMethod.CONTEXT_MENU, focusedNode));
}
/**
* Queues up selected text for reading by finding the Position objects
* representing the selection.
* @param method the method that
* caused the text to speak.
*/
private requestSpeakSelectedText_(
method: MetricsUtils.StartSpeechMethod,
focusedNode: AutomationNode): void {
// If nothing is selected, return early. Check if the focused node has
// textSelStart and textSelEnd. For native UI like the omnibox, the root
// might not have a selectionStartObject and selectionEndObject. Therefore
// we must check textSelStart and textSelEnd on the focused node.
if (!focusedNode || !focusedNode.root) {
this.onNullSelection_();
return;
}
const hasSelectionObjects = focusedNode.root.selectionStartObject &&
focusedNode.root.selectionEndObject;
const hasTextSelection = focusedNode.textSelStart !== undefined &&
focusedNode.textSelEnd !== undefined;
if (!hasSelectionObjects && !hasTextSelection) {
this.onNullSelection_();
return;
}
let startObject;
let startOffset = 0;
let endObject;
let endOffset = 0;
// Use selectionStartObject/selectionEndObject if available. Otherwise,
// use textSelStart/textSelEnd to get the selection offset.
if (hasSelectionObjects) {
startObject = focusedNode.root.selectionStartObject;
startOffset = focusedNode.root.selectionStartOffset || 0;
endObject = focusedNode.root.selectionEndObject;
endOffset = focusedNode.root.selectionEndOffset || 0;
} else if (hasTextSelection) {
startObject = focusedNode;
startOffset = focusedNode.textSelStart || 0;
endObject = focusedNode;
endOffset = focusedNode.textSelEnd || 0;
}
if (startObject === endObject && startOffset === endOffset) {
this.onNullSelection_();
return;
}
// First calculate the equivalent position for this selection.
// Sometimes the automation selection returns an offset into a root
// node rather than a child node, which may be a bug. This allows us to
// work around that bug until it is fixed or redefined.
// Note that this calculation is imperfect: it uses node name length
// to index into child nodes. However, not all node names are
// user-visible text, so this does not always work. Instead, we must
// fix the Blink bug where focus offset is not specific enough to
// say which node is selected and at what charOffset. See
// https://crbug.com/803160 for more.
const startPosition = NodeUtils.getDeepEquivalentForSelection(
startObject!, startOffset, true);
const endPosition =
NodeUtils.getDeepEquivalentForSelection(endObject!, endOffset, false);
// TODO(katie): We go into these blocks but they feel redundant. Can
// there be another way to do this?
let firstPosition;
let lastPosition;
if (startPosition.node === endPosition.node) {
if (startPosition.offset < endPosition.offset) {
firstPosition = startPosition;
lastPosition = endPosition;
} else {
lastPosition = startPosition;
firstPosition = endPosition;
}
} else {
const dir =
AutomationUtil.getDirection(startPosition.node, endPosition.node);
// Highlighting may be forwards or backwards. Make sure we start at the
// first node.
if (dir === constants.Dir.FORWARD) {
firstPosition = startPosition;
lastPosition = endPosition;
} else {
lastPosition = startPosition;
firstPosition = endPosition;
}
}
this.cancelIfSpeaking_(true /* clear the focus ring */);
this.readNodesBetweenPositions_(
firstPosition, lastPosition, method, focusedNode);
}
/**
* Reads nodes between positions.
* @param firstPosition The first position at which to start reading.
* @param lastPosition The last position at which to stop reading.
* @param method the method used to
* activate the speech, null if not actived by user.
* @param focusedNode The node with user focus.
*/
private readNodesBetweenPositions_(
firstPosition: NodeUtils.Position, lastPosition: NodeUtils.Position,
method: MetricsUtils.StartSpeechMethod|null,
focusedNode: AutomationNode|undefined): void {
const nodes = [];
// TODO(b/314204374): AutomationUtil.findNextNode may return null.
let selectedNode: AutomationNode|null = firstPosition.node;
// If the method is set, a user requested the speech.
const userRequested = method !== null;
const methodNumber: number = method !== null ? method : -1;
// Certain nodes such as omnibox store text value in the value property,
// instead of the name property. The getNodeName method in ParagraphUtils
// does handle this case properly, so use this static method to get text
// from either `name' or `value' of the node.
const nodeName = ParagraphUtils.getNodeName(selectedNode);
if (nodeName && firstPosition.offset < nodeName.length &&
!NodeUtils.shouldIgnoreNode(
selectedNode, /* include offscreen */ true) &&
!NodeUtils.isNotSelectable(selectedNode)) {
// Initialize to the first node in the list if it's valid and inside
// of the offset bounds.
nodes.push(selectedNode);
} else {
// The selectedNode actually has no content selected. Let the list
// initialize itself to the next node in the loop below.
// This can happen if you click-and-drag starting after the text in
// a first line to highlight text in a second line.
firstPosition.offset = 0;
}
while (selectedNode && selectedNode !== lastPosition.node &&
AutomationUtil.getDirection(selectedNode, lastPosition.node) ===
constants.Dir.FORWARD) {
// TODO: Is there a way to optimize the directionality checking of
// AutomationUtil.getDirection(selectedNode, finalNode)?
// For example, by making a helper and storing partial computation?
selectedNode = AutomationUtil.findNextNode(
selectedNode, constants.Dir.FORWARD,
AutomationPredicate.leafWithText);
if (!selectedNode) {
break;
} else if (NodeUtils.isTextField(selectedNode)) {
// Dive down into the next text node.
// Why does leafWithText return text fields?
selectedNode = AutomationUtil.findNextNode(
selectedNode, constants.Dir.FORWARD,
AutomationPredicate.leafWithText);
if (!selectedNode) {
break;
}
}
if (!NodeUtils.shouldIgnoreNode(
selectedNode, /* include offscreen */ true) &&
!NodeUtils.isNotSelectable(selectedNode)) {
nodes.push(selectedNode);
}
}
if (nodes.length > 0) {
if (lastPosition.node !== nodes[nodes.length - 1]) {
// The node at the last position was not added to the list, perhaps it
// was whitespace or invisible. Clear the ending offset because it
// relates to a node that doesn't exist.
this.startSpeechQueue_(nodes, {
clearFocusRing: userRequested,
startCharIndex: firstPosition.offset,
});
} else {
this.startSpeechQueue_(nodes, {
clearFocusRing: userRequested,
startCharIndex: firstPosition.offset,
endCharIndex: lastPosition.offset,
});
}
if (focusedNode) {
this.initializeScrollingToOffscreenNodes_(focusedNode.root);
}
if (userRequested) {
MetricsUtils.recordStartEvent(methodNumber, this.prefsManager_);
}
} else {
// Gsuite apps include webapps beyond Docs, see getGSuiteAppRoot and
// GSUITE_APP_REGEXP.
const gsuiteAppRootNode = getGSuiteAppRoot(focusedNode);
if (!gsuiteAppRootNode) {
return;
}
chrome.tabs.query({active: true}, tabs => {
// Closure doesn't realize that we did a !gsuiteAppRootNode earlier
// so we check again here.
if (!gsuiteAppRootNode || gsuiteAppRootNode.url === undefined) {
return;
}
this.inputHandler_!.onRequestReadClipboardData();
this.currentNodeGroupItem_ =
new ParagraphUtils.NodeGroupItem(gsuiteAppRootNode, 0, false);
if (tabs.length > 0 && tabs[0].url === gsuiteAppRootNode.url) {
const tab = tabs[0];
chrome.tabs.executeScript(tab.id, {
allFrames: true,
matchAboutBlank: true,
code: 'document.execCommand("copy");',
});
} else {
// In Lacros because chrome.tabs didn't return a tab or it
// was a tab with a different URL.
chrome.accessibilityPrivate.clipboardCopyInActiveLacrosGoogleDoc(
gsuiteAppRootNode.url);
}
if (userRequested) {
MetricsUtils.recordStartEvent(methodNumber, this.prefsManager_);
}
});
}
}
/**
* Gets ready to cancel future scrolling to offscreen nodes as soon as
* a user-initiated scroll is done.
* @param root The root node to listen for events on.
*/
private initializeScrollingToOffscreenNodes_(root: AutomationNode|
undefined): void {
if (!root) {
return;
}
this.scrollToSpokenNode_ = true;
const listener = (event: chrome.automation.AutomationEvent): void => {
if (event.eventFrom !== 'action') {
// User initiated event. Cancel all future scrolling to spoken nodes.
// If the user wants a certain scroll position we will respect that.
this.scrollToSpokenNode_ = false;
// Now remove these event listeners, we no longer need them.
root.removeEventListener(
EventType.SCROLL_POSITION_CHANGED, listener, false);
root.removeEventListener(
EventType.SCROLL_HORIZONTAL_POSITION_CHANGED, listener, false);
root.removeEventListener(
EventType.SCROLL_VERTICAL_POSITION_CHANGED, listener, false);
}
};
// ARC++ fires the first event, Views/Web fire the horizontal/vertical
// scroll position changed events via AXEventGenerator.
root.addEventListener(EventType.SCROLL_POSITION_CHANGED, listener, false);
root.addEventListener(
EventType.SCROLL_HORIZONTAL_POSITION_CHANGED, listener, false);
root.addEventListener(
EventType.SCROLL_VERTICAL_POSITION_CHANGED, listener, false);
}
/**
* Plays a tone to let the user know they did the correct
* keystroke but nothing was selected.
*/
private onNullSelection_(): void {
if (!this.shouldShowNavigationControls_()) {
this.nullSelectionTone_.play();
return;
}
this.uiManager_.setFocusToPanel();
}
/**
* Whether the STS is on a pause state, where |this.ttsManager_.isSpeaking| is
* false and |this.state_| is SPEAKING.
* TODO(leileilei): use two SelectToSpeak states to differentiate speaking and
* pausing with panel.
*/
private isPaused_(): boolean {
return !this.ttsManager_.isSpeaking() &&
this.state_ === SelectToSpeakState.SPEAKING;
}
/**
* Pause the TTS.
*/
private pause_(): Promise<any> {
return this.ttsManager_.pause();
}
/**
* Resume the TTS.
*/
private resume_(): void {
// If TTS is not paused, return early.
if (!this.isPaused_()) {
return;
}
const currentNodeGroup = this.getCurrentNodeGroup_();
// If there is no processed node group, that means the user has not selected
// anything. Ignore the resume command.
if (!currentNodeGroup) {
return;
}
this.ttsManager_.resume(this.getTtsOptionsForCurrentNodeGroup_());
}
/**
* If resume is successful, a resume event will be sent. We use this event to
* update node state.
*/
private onTtsResumeSucceedEvent_(event: chrome.tts.TtsEvent): void {
// If the node group is invalid, ignore the resume event. This is not
// expected.
const currentNodeGroup = this.getCurrentNodeGroup_();
if (!currentNodeGroup) {
console.warn('Unexpected invalid node group on TTS resume event.');
return;
}
this.onTtsWordEvent_(event, currentNodeGroup);
}
/**
* When resuming with empty content, an error event will be sent. If there
* is no remaining user-selected content, STS will read from the current
* position to the end of the current paragraph. If there is no content left
* in this paragraph, we navigate to the next paragraph.
*/
private onTtsResumeErrorEvent_(_event: chrome.tts.TtsEvent): void {
// If the node group is invalid, ignore the error event. This is not
// expected.
const currentNodeGroup = this.getCurrentNodeGroup_();
if (!currentNodeGroup) {
console.warn(
'Unexpected invalid node group on TTS error event when resuming.');
return;
}
// STS should try to read from the current position to the end of the
// current paragraph. First, we get the current position. If we do not find
// a position based on the |this.currentCharIndex_|, that means we have
// reached the end of current node group. We fallback to the end position.
const currentPosition = NodeUtils.getPositionFromNodeGroup(
currentNodeGroup, this.currentCharIndex_, true /* fallbackToEnd */);
// If we have passed the user-selected content, STS should speak the content
// from the current position to the end of the current node group.
const {nodes: remainingNodes, offset} =
NodeNavigationUtils.getNextNodesInParagraphFromPosition(
currentPosition, constants.Dir.FORWARD);
// If there is no remaining nodes in this paragraph, we navigate to the next
// paragraph.
if (remainingNodes.length === 0) {
this.navigateToNextParagraph_(constants.Dir.FORWARD);
return;
}
this.startSpeechQueue_(remainingNodes, {
clearFocusRing: false,
startCharIndex: offset,
});
}
/**
* Stop speech. If speech was in-progress, the interruption
* event will be caught and clearFocusRingAndNode_ will be
* called, stopping visual feedback as well.
* If speech was not in progress, i.e. if the user was drawing
* a focus ring on the screen, this still clears the visual
* focus ring.
*/
private stopAll_(): void {
this.ttsManager_.stop();
this.uiManager_.clear();
this.onStateChanged_(SelectToSpeakState.INACTIVE);
}
/**
* Clears the current focus ring and node, but does
* not stop the speech.
*/
private clearFocusRingAndNode_(): void {
this.uiManager_.clear();
// Clear the node and also stop the interval testing.
this.resetNodes_();
this.supportsNavigationPanel_ = true;
if (this.intervalId_ !== undefined) {
clearInterval(this.intervalId_);
this.intervalId_ = undefined;
}
this.scrollToSpokenNode_ = false;
}
/**
* Resets the instance variables for nodes and node groups.
*/
private resetNodes_(): void {
this.currentNodeGroups_ = [];
this.currentNodeGroupIndex_ = -1;
this.currentNodeGroupItem_ = null;
this.currentNodeGroupItemIndex_ = -1;
this.currentNodeWord_ = null;
this.currentCharIndex_ = -1;
}
/**
* Runs content scripts that allow Select-to-Speak access to
* Google Docs content without a11y mode enabled, in every open
* tab. Should be run when Select-to-Speak starts up so that any
* tabs already opened will be checked.
* This should be kept in sync with the "content_scripts" section in
* the Select-to-Speak manifest.
*/
private runContentScripts_(): void {
const scripts = chrome.runtime.getManifest()['content_scripts'][0]['js'];
// We only ever expect one content script.
if (scripts.length !== 1) {
throw new Error(
'Only expected one script; got ' + JSON.stringify(scripts));
}
const script = scripts[0];
chrome.tabs.query(
{
url: [
'https://docs.google.com/document*',
'https://docs.sandbox.google.com/*',
],
},
tabs => {
tabs.forEach(tab => {
chrome.tabs.executeScript(tab.id, {file: script});
});
});
}
/**
* Set up event listeners user input.
*/
private setUpEventListeners_(): void {
this.inputHandler_ = new InputHandler({
// canStartSelecting: Whether mouse selection can begin.
canStartSelecting: () => {
return this.state_ !== SelectToSpeakState.SELECTING;
},
// onSelectingStateChanged: Started or stopped mouse selection.
onSelectingStateChanged: (isSelecting, x, y) => {
if (isSelecting) {
this.onStateChanged_(SelectToSpeakState.SELECTING);
// Fire a hit test event on click to warm up the cache, and cancel
// if speaking.
this.cancelIfSpeaking_(false /* don't clear the focus ring */);
this.desktop_!.hitTest(x, y, EventType.MOUSE_PRESSED);
} else {
this.onStateChanged_(SelectToSpeakState.INACTIVE);
// Do a hit test at the center of the area the user dragged over.
// This will give us some context when searching the accessibility
// tree. The hit test will result in a EventType.MOUSE_RELEASED
// event being fired on the result of that hit test, which will
// trigger onAutomationHitTest_.
this.desktop_!.hitTest(x, y, EventType.MOUSE_RELEASED);
}
},
// onSelectionChanged: Mouse selection rect changed.
onSelectionChanged: rect => {
this.uiManager_.setSelectionRect(rect);
},
// onKeystrokeSelection: Keys pressed for reading highlighted text.
onKeystrokeSelection: () => {
chrome.automation.getFocus(
focusedNode => this.requestSpeakSelectedText_(
MetricsUtils.StartSpeechMethod.KEYSTROKE, focusedNode));
},
// onRequestCancel: User requested canceling input/speech.
onRequestCancel: () => {
// User manually requested cancel, so log cancel metric.
MetricsUtils.recordCancelIfSpeaking();
this.cancelIfSpeaking_(true /* clear the focus ring */);
},
// onTextReceived: Text received from a 'paste' event to read aloud.
onTextReceived: text => this.startSpeech_(text),
});
this.inputHandler_.setUpEventListeners();
// Initialize the state to SelectToSpeakState.INACTIVE.
chrome.accessibilityPrivate.setSelectToSpeakState(this.state_);
}
/**
* Called when Chrome OS is requesting Select-to-Speak to switch states.
*/
onStateChangeRequested(): void {
// Switch Select-to-Speak states on request.
// We will need to track the current state and toggle from one state to
// the next when this function is called, and then call
// accessibilityPrivate.setSelectToSpeakState with the new state.
switch (this.state_) {
case SelectToSpeakState.INACTIVE:
// Start selection.
this.inputHandler_!.setTrackingMouse(true);
this.onStateChanged_(SelectToSpeakState.SELECTING);
MetricsUtils.recordSelectToSpeakStateChangeEvent(
MetricsUtils.StateChangeEvent.START_SELECTION);
break;
case SelectToSpeakState.SPEAKING:
// Stop speaking. User manually requested, so log cancel metric.
MetricsUtils.recordCancelIfSpeaking();
this.cancelIfSpeaking_(true /* clear the focus ring */);
MetricsUtils.recordSelectToSpeakStateChangeEvent(
MetricsUtils.StateChangeEvent.CANCEL_SPEECH);
break;
case SelectToSpeakState.SELECTING:
// Cancelled selection.
this.inputHandler_!.setTrackingMouse(false);
this.onStateChanged_(SelectToSpeakState.INACTIVE);
MetricsUtils.recordSelectToSpeakStateChangeEvent(
MetricsUtils.StateChangeEvent.CANCEL_SELECTION);
}
this.onStateChangeRequestedCallbackForTest_ &&
this.onStateChangeRequestedCallbackForTest_();
}
/** Handles user request to navigate to next paragraph. */
onNextParagraphRequested(): void {
this.navigateToNextParagraph_(constants.Dir.FORWARD);
}
/** Handles user request to navigate to previous paragraph. */
onPreviousParagraphRequested(): void {
this.navigateToNextParagraph_(constants.Dir.BACKWARD);
}
/** Handles user request to navigate to next sentence. */
onNextSentenceRequested(): void {
this.navigateToNextSentence_(constants.Dir.FORWARD);
}
/** Handles user request to navigate to previous sentence. */
onPreviousSentenceRequested(): void {
this.navigateToNextSentence_(constants.Dir.BACKWARD);
}
/** Handles user request to navigate to exit STS. */
onExitRequested(): void {
// User manually requested, so log cancel metric.
MetricsUtils.recordCancelIfSpeaking();
this.stopAll_();
}
/** Handles user request to pause TTS. */
onPauseRequested(): void {
MetricsUtils.recordPauseEvent();
this.pause_();
}
/** Handles user request to resume TTS. */
onResumeRequested(): void {
if (this.isPaused_()) {
MetricsUtils.recordResumeEvent();
this.resume_();
}
}
/**
* Handles user request to adjust reading speed.
*/
onChangeSpeedRequested(rateMultiplier: number): void {
this.speechRateMultiplier_ = rateMultiplier;
// If currently playing, stop TTS, then resume from current spot.
if (!this.isPaused_()) {
this.pause_().then(() => {
this.resume_();
});
}
}
/**
* Navigates to the next sentence.
* @param direction Direction to search for the next sentence.
* If set to forward, we look for the sentence start after the current
* position. Otherwise, we look for the sentence start before the current
* position.
*/
private async navigateToNextSentence_(direction: constants.Dir):
Promise<void> {
if (!this.isPaused_()) {
await this.pause_();
}
const {nodes, offset} = NodeNavigationUtils.getNodesForNextSentence(
this.getCurrentNodeGroup_(), this.currentCharIndex_, direction,
nodes => this.skipPanel_(nodes));
if (nodes.length === 0) {
return;
}
// Ensure the first node in the paragraph is visible.
nodes[0].makeVisible();
this.startSpeechQueue_(nodes, {
startCharIndex: offset,
});
}
/**
* Navigates to the next text block in the given direction.
*/
private async navigateToNextParagraph_(direction: constants.Dir):
Promise<void> {
if (!this.isPaused_()) {
// Stop TTS if it is currently playing.
await this.pause_();
}
const nodes = NodeNavigationUtils.getNodesForNextParagraph(
this.getCurrentNodeGroup_(), direction,
nodes => this.skipPanel_(nodes));
// Return early if the nodes are empty.
if (nodes.length === 0) {
return;
}
// Ensure the first node in the paragraph is visible.
nodes[0].makeVisible();
this.startSpeechQueue_(nodes);
}
/**
* A predicate for paragraph selection and navigation. The current
* implementation filters out paragraph that belongs to the panel.
* @return Whether the paragraph made of the |nodes| is valid
*/
private skipPanel_(nodes: AutomationNode[]): boolean {
return !AutomationUtil.getAncestors(nodes[0]).find(
n => UiManager.isPanel(n));
}
/**
* Enqueue speech for the single given string. The string is not associated
* with any particular nodes, so this does not do any work around drawing
* focus rings, unlike startSpeechQueue_ below.
* @param text The text to speak.
*/
private startSpeech_(text: string): void {
this.prepareForSpeech_(true /* clearFocusRing */);
this.maybeShowEnhancedVoicesDialog_(() => {
const options = this.prefsManager_.getSpeechOptions(null);
const fallbackVoiceName = this.prefsManager_.getLocalVoice();
// Without nodes to anchor on, navigate is not supported.
this.supportsNavigationPanel_ = false;
options.onEvent = event => {
if (event.type === 'start') {
this.onStateChanged_(SelectToSpeakState.SPEAKING);
this.updateUi_();
} else if (
event.type === 'end' || event.type === 'interrupted' ||
event.type === 'cancelled') {
// Automatically dismiss when we're at the end.
this.onStateChanged_(SelectToSpeakState.INACTIVE);
}
};
const voiceName = options['voiceName'] || '';
MetricsUtils.recordTtsEngineUsed(voiceName || '', this.prefsManager_);
this.ttsManager_.speak(
text, options, this.prefsManager_.isNetworkVoice(voiceName),
fallbackVoiceName);
});
}
/**
* Enqueue nodes to TTS queue and start TTS. This function can be used for
* adding nodes, either from user selection (e.g., mouse selection) or
* navigation control (e.g., next paragraph).
* @param nodes The nodes to speak.
* @param optParams:
* clearFocusRing: Whether to clear the focus ring or not. For example, we
* need to clear the focus ring when starting from scratch but we do not need
* to clear the focus ring when resuming from a previous pause. If this is not
* passed, will default to false.
* startCharIndex: The index into the first node's text at which to start
* speaking. If this is not passed, will start at 0.
* endCharIndex: The index into the last node's text at which to end
* speech. If this is not passed, will stop at the end.
*/
private startSpeechQueue_(nodes: AutomationNode[], optParams?: {
clearFocusRing?: boolean,
startCharIndex?: number,
endCharIndex?: number,
}): void {
this.maybeShowEnhancedVoicesDialog_(() => {
const params = optParams || {};
const clearFocusRing = params.clearFocusRing || false;
let startCharIndex = params.startCharIndex;
let endCharIndex = params.endCharIndex;
this.prepareForSpeech_(clearFocusRing /* clear the focus ring */);
if (nodes.length === 0) {
return;
}
// Remember the original first and last node in the given list, as
// |startCharIndex| and |endCharIndex| pertain to them. If, after SVG
// resorting, the first or last nodes are re-ordered, do not clip them.
const originalFirstNode = nodes[0];
const originalLastNode = nodes[nodes.length - 1];
// Sort any SVG child nodes, if present, by visual reading order.
NodeUtils.sortSvgNodesByReadingOrder(nodes);
// Override start or end index if original nodes were sorted.
if (originalFirstNode !== nodes[0]) {
startCharIndex = undefined;
}
if (originalLastNode !== nodes[nodes.length - 1]) {
endCharIndex = undefined;
}
this.supportsNavigationPanel_ = this.isNavigationPanelSupported_(nodes);
this.updateNodeGroups_(nodes, startCharIndex, endCharIndex);
// Play TTS according to the current state variables.
this.startCurrentNodeGroup_();
});
}
/**
* Updates the node groups to be spoken. Converts |nodes|, |startCharIndex|,
* and |endCharIndex| into node groups, and updates |this.currentNodeGroups_|
* and |this.currentNodeGroupIndex_|.
* @param nodes The nodes to speak.
* @param startCharIndex The index into the first node's text at
* which to start speaking. If this is not passed, will start at 0.
* @param endCharIndex The index into the last node's text at which
* to end speech. If this is not passed, will stop at the end.
*/
private updateNodeGroups_(
nodes: AutomationNode[], startCharIndex?: number,
endCharIndex?: number): void {
this.resetNodes_();
for (let i = 0; i < nodes.length; i++) {
// When navigation controls are enabled, disable the clipping of overflow
// words. When overflow words are clipped, words scrolled out of view are
// clipped, which is undesirable for our navigation features as we
// generate node groups for next/previous paragraphs which may be fully or
// partially scrolled out of view.
const nodeGroup = ParagraphUtils.buildNodeGroup(nodes, i, {
splitOnLanguage: this.shouldUseVoiceSwitching_(),
clipOverflowWords: !this.shouldShowNavigationControls_(),
});
const isFirstNodeGroup = i === 0;
const shouldApplyStartOffset =
isFirstNodeGroup && startCharIndex !== undefined;
const firstNodeHasInlineText =
nodeGroup.nodes.length > 0 && nodeGroup.nodes[0].hasInlineText;
if (shouldApplyStartOffset) {
let startIndexInNodeGroup;
if (firstNodeHasInlineText) {
// We assume that the start offset will only be applied to the first
// node in the first NodeGroup. The |startCharIndex| needs to be
// adjusted. The first node of the NodeGroup may not be at the
// beginning of the parent of the NodeGroup. (e.g., an inlineText in
// its staticText parent). Thus, we need to adjust the start index.
const startIndexInNodeParent =
ParagraphUtils.getStartCharIndexInParent(nodes[0]);
startIndexInNodeGroup = startCharIndex + startIndexInNodeParent +
nodeGroup.nodes[0].startChar;
} else {
// Text field such as omnibox doesn't have inline text, but text in
// the value property. In case the user selects some text within, we
// need to adjust |startCharIndex| accordingly.
startIndexInNodeGroup = startCharIndex + nodeGroup.nodes[0].startChar;
}
this.applyOffset(
nodeGroup, startIndexInNodeGroup, true /* isStartOffset */);
}
// Advance i to the end of this group, to skip all nodes it contains.
i = nodeGroup.endIndex;
const isLastNodeGroup = (i === nodes.length - 1);
const shouldApplyEndOffset =
isLastNodeGroup && endCharIndex !== undefined;
const lastNodeHasInlineText = nodeGroup.nodes.length > 0 &&
nodeGroup.nodes[nodeGroup.nodes.length - 1].hasInlineText;
if (shouldApplyEndOffset) {
let endIndexInNodeGroup;
if (lastNodeHasInlineText) {
// We assume that the end offset will only be applied to the last
// node in the last NodeGroup. Similarly, |endCharIndex| needs to be
// adjusted.
const startIndexInNodeParent =
ParagraphUtils.getStartCharIndexInParent(nodes[i]);
endIndexInNodeGroup = endCharIndex + startIndexInNodeParent +
nodeGroup.nodes[nodeGroup.nodes.length - 1].startChar;
} else {
// Text field such as omnibox doesn't have inline text, but text in
// the value property. In case the user selects some text within, we
// need to adjust |endCharIndex| accordingly.
endIndexInNodeGroup = endCharIndex +
nodeGroup.nodes[nodeGroup.nodes.length - 1].startChar;
}
this.applyOffset(
nodeGroup, endIndexInNodeGroup, false /* isStartOffset */);
}
if (nodeGroup.nodes.length === 0 && !isLastNodeGroup) {
continue;
}
this.currentNodeGroups_.push(nodeGroup);
}
// Sets the initial node group index to zero if this.currentNodeGroups_ has
// items.
if (this.currentNodeGroups_.length > 0) {
this.currentNodeGroupIndex_ = 0;
}
}
/**
* Starts reading the current node group.
*/
private startCurrentNodeGroup_(): void {
const nodeGroup = this.getCurrentNodeGroup_();
if (!nodeGroup) {
return;
}
if (!nodeGroup.text) {
this.onNodeGroupSpeakingCompleted_();
return;
}
const options = this.getTtsOptionsForCurrentNodeGroup_();
const voiceName = (options && options['voiceName']) || '';
const fallbackVoiceName = this.prefsManager_.getLocalVoice();
MetricsUtils.recordTtsEngineUsed(voiceName, this.prefsManager_);
this.ttsManager_.speak(
// TODO(b/314203187): Options may be undefined.
nodeGroup.text, options!, this.prefsManager_.isNetworkVoice(voiceName),
fallbackVoiceName);
}
private getTtsOptionsForCurrentNodeGroup_(): chrome.tts.TtsOptions|undefined {
const nodeGroup = this.getCurrentNodeGroup_();
if (!nodeGroup) {
return;
}
const options: chrome.tts.TtsOptions = {};
let language;
let useVoiceSwitching = false;
if (this.shouldUseVoiceSwitching_() && nodeGroup.detectedLanguage) {
language = nodeGroup.detectedLanguage;
useVoiceSwitching = true;
}
Object.assign(
options,
this.prefsManager_.getSpeechOptions({language, useVoiceSwitching}));
if (this.shouldShowNavigationControls_()) {
options.rate = this.getSpeechRate_();
// Log speech rate multiple applied by Select-to-speak.
MetricsUtils.recordSpeechRateOverrideMultiplier(
this.speechRateMultiplier_);
}
const nodeGroupText = nodeGroup.text || '';
options.onEvent = (event: chrome.tts.TtsEvent) => {
switch (event.type) {
case chrome.tts.EventType.START:
if (nodeGroup.nodes.length <= 0) {
break;
}
this.onStateChanged_(SelectToSpeakState.SPEAKING);
// Update |this.currentCharIndex_|. Find the first non-space char
// index in nodeGroup text, or 0 if the text is undefined or the first
// char is non-space.
this.currentCharIndex_ = nodeGroupText.search(/\S|$/);
this.syncCurrentNodeWithCharIndex_(nodeGroup, this.currentCharIndex_);
if (this.prefsManager_.wordHighlightingEnabled()) {
// At start, find the first word and highlight that. Clear the
// previous word in the node.
this.currentNodeWord_ = null;
// If |this.currentCharIndex_| is not 0, that means we have applied
// a start offset. Thus, we need to pass startIndexInNodeGroup to
// optStartIndex and overwrite the word boundaries in the original
// node.
this.updateNodeHighlight_(
nodeGroupText, this.currentCharIndex_,
this.currentCharIndex_ !== 0 ? this.currentCharIndex_ :
undefined);
} else {
this.updateUi_();
}
break;
case chrome.tts.EventType.RESUME:
this.onTtsResumeSucceedEvent_(event);
break;
case chrome.tts.EventType.ERROR:
if (event.errorMessage ===
TtsManager.ErrorMessage.RESUME_WITH_EMPTY_CONTENT) {
this.onTtsResumeErrorEvent_(event);
}
break;
// @ts-expect-error: Fallthrough on purpose.
case chrome.tts.EventType.PAUSE:
// Updates the select to speak state to speaking to keep navigation
// panel visible, so that the user can click resume from the panel.
this.onStateChanged_(SelectToSpeakState.SPEAKING);
// Fall through.
case chrome.tts.EventType.INTERRUPTED:
case chrome.tts.EventType.CANCELLED:
if (!this.shouldShowNavigationControls_()) {
this.onStateChanged_(SelectToSpeakState.INACTIVE);
break;
}
if (this.state_ === SelectToSpeakState.SELECTING) {
// Do not go into inactive state if navigation controls are enabled
// and we're currently making a new selection. This enables users
// to select new nodes while STS is active without first exiting.
break;
}
break;
case chrome.tts.EventType.END:
this.onNodeGroupSpeakingCompleted_();
break;
case chrome.tts.EventType.WORD:
this.onTtsWordEvent_(event, nodeGroup);
break;
default:
break;
}
};
return options;
}
/**
* When a node group is completed, we start speaking the next node group
* indicated by the end index. If we have reached the last node group, this
* function will update STS status depending whether the navigation feature is
* enabled.
*/
private onNodeGroupSpeakingCompleted_(): void {
const currentNodeGroup = this.getCurrentNodeGroup_();
// Update the current char index to the end of the node group. If the
// endOffset is undefined, we set the index to the length of the node
// group's text.
if (currentNodeGroup && currentNodeGroup.endOffset !== undefined) {
this.currentCharIndex_ = currentNodeGroup.endOffset;
} else {
const nodeGroupText = (currentNodeGroup && currentNodeGroup.text) || '';
this.currentCharIndex_ = nodeGroupText.length;
}
const isLastNodeGroup =
(this.currentNodeGroupIndex_ === this.currentNodeGroups_.length - 1);
if (isLastNodeGroup) {
if (!this.shouldShowNavigationControls_()) {
this.onStateChanged_(SelectToSpeakState.INACTIVE);
} else {
// If navigation features are enabled, we should keep STS state to
// speaking so that the user can hit resume to continue.
this.onStateChanged_(SelectToSpeakState.SPEAKING);
}
return;
}
// Start reading the next node group.
this.currentNodeGroupIndex_++;
this.startCurrentNodeGroup_();
}
/**
* Update |this.currentNodeGroupItem_|, the current speaking or the node to be
* spoken in the node group.
* @param nodeGroup the current nodeGroup.
* @param charIndex the start char index of the word to be spoken.
* The index is relative to the entire NodeGroup.
* @param optStartFromNodeGroupIndex the NodeGroupIndex to start
* with. If undefined, search from 0.
* @return If the found NodeGroupIndex is different from the
* |optStartFromNodeGroupIndex|.
*/
private syncCurrentNodeWithCharIndex_(
nodeGroup: ParagraphUtils.NodeGroup, charIndex: number,
optStartFromNodeGroupIndex?: number): boolean {
if (optStartFromNodeGroupIndex === undefined) {
optStartFromNodeGroupIndex = 0;
}
// There is no speaking word, set the NodeGroupItemIndex to 0.
if (charIndex <= 0) {
this.currentNodeGroupItemIndex_ = 0;
this.currentNodeGroupItem_ =
nodeGroup.nodes[this.currentNodeGroupItemIndex_];
return this.currentNodeGroupItemIndex_ === optStartFromNodeGroupIndex;
}
// Sets the |this.currentNodeGroupItemIndex_| to
// |optStartFromNodeGroupIndex|
this.currentNodeGroupItemIndex_ = optStartFromNodeGroupIndex;
this.currentNodeGroupItem_ =
nodeGroup.nodes[this.currentNodeGroupItemIndex_];
if (this.currentNodeGroupItemIndex_ + 1 < nodeGroup.nodes.length) {
let next: ParagraphUtils.NodeGroupItem|null =
nodeGroup.nodes[this.currentNodeGroupItemIndex_ + 1];
let nodeUpdated = false;
// TODO(katie): For something like a date, the start and end
// node group nodes can actually be different. Example:
// "<span>Tuesday,</span> December 18, 2018".
// Check if we've reached this next node yet. Since charIndex is the
// start char index of the target word, we just need to make sure the
// next.startchar is bigger than it.
while (next && charIndex >= next.startChar &&
this.currentNodeGroupItemIndex_ + 1 < nodeGroup.nodes.length) {
next = this.incrementCurrentNodeAndGetNext_(nodeGroup);
nodeUpdated = true;
}
return nodeUpdated;
}
return false;
}
/**
* Apply start or end offset to the text of the |nodeGroup|.
* @param nodeGroup the input nodeGroup.
* @param offset the size of offset.
* @param isStartOffset whether to apply a startOffset or an
* endOffset.
*/
applyOffset(
nodeGroup: ParagraphUtils.NodeGroup, offset: number,
isStartOffset: boolean): void {
if (isStartOffset) {
// Applying start offset. Remove all text before the start index so that
// it is not spoken. Backfill with spaces so that index counting
// functions don't get confused.
nodeGroup.text = ' '.repeat(offset) + nodeGroup.text.substr(offset);
} else {
// Remove all text after the end index so it is not spoken.
nodeGroup.text = nodeGroup.text.substr(0, offset);
nodeGroup.endOffset = offset;
}
}
/**
* Prepares for speech. Call once before this.ttsManager_.speak is called.
* @param clearFocusRing Whether to clear the focus ring.
*/
private prepareForSpeech_(clearFocusRing: boolean): void {
this.cancelIfSpeaking_(clearFocusRing /* clear the focus ring */);
// Update the UI on an interval, to adapt to automation tree changes.
if (this.intervalId_ !== undefined) {
clearInterval(this.intervalId_);
}
this.intervalId_ = setInterval(
() => this.updateUi_(),
SelectToSpeakConstants.NODE_STATE_TEST_INTERVAL_MS);
}
/**
* Uses the 'word' speech event to determine which node is currently beings
* spoken, and prepares for highlight if enabled.
* @param event The event to use for updates.
* @param nodeGroup The node group for this
* utterance.
*/
private onTtsWordEvent_(
event: chrome.tts.TtsEvent, nodeGroup: ParagraphUtils.NodeGroup): void {
if (event.charIndex === undefined) {
return;
}
// Not all speech engines include length in the ttsEvent object. .
const hasLength = event.length !== undefined && event.length >= 0;
const length = event.length || 0;
// Only update the |this.currentCharIndex_| if event has a higher charIndex.
// TTS sometimes will report an incorrect number at the end of an utterance.
this.currentCharIndex_ = Math.max(event.charIndex, this.currentCharIndex_);
console.debug(nodeGroup.text + ' (index ' + event.charIndex + ')');
let debug = '-'.repeat(event.charIndex);
if (hasLength) {
debug += '^'.repeat(length);
} else {
debug += '^';
}
console.debug(debug);
// First determine which node contains the word currently being spoken,
// and update this.currentNodeGroupItem_, this.currentNodeWord_, and
// this.currentNodeGroupItemIndex_ to match.
const nodeUpdated = this.syncCurrentNodeWithCharIndex_(
nodeGroup, event.charIndex, this.currentNodeGroupItemIndex_);
if (nodeUpdated && !this.prefsManager_.wordHighlightingEnabled()) {
// If we are doing a per-word highlight, we update the UI after figuring
// out what the currently highlighted word is. Otherwise, update now.
this.updateUi_();
}
// Finally update the word highlight if it is enabled.
if (this.prefsManager_.wordHighlightingEnabled()) {
if (hasLength) {
this.currentNodeWord_ = {
'start': event.charIndex - this.currentNodeGroupItem_!.startChar,
'end':
event.charIndex + length - this.currentNodeGroupItem_!.startChar,
};
this.updateUi_();
} else {
this.updateNodeHighlight_(nodeGroup.text, event.charIndex);
}
} else {
this.currentNodeWord_ = null;
}
}
/**
* Updates the current node and relevant points to be the next node in the
* group, then returns the next node in the group after that.
*/
private incrementCurrentNodeAndGetNext_(nodeGroup: ParagraphUtils.NodeGroup):
ParagraphUtils.NodeGroupItem|null {
// Move to the next node.
this.currentNodeGroupItemIndex_ += 1;
this.currentNodeGroupItem_ =
nodeGroup.nodes[this.currentNodeGroupItemIndex_];
// Setting this.currentNodeWord_ to null signals it should be recalculated
// later.
this.currentNodeWord_ = null;
if (this.currentNodeGroupItemIndex_ + 1 >= nodeGroup.nodes.length) {
return null;
}
return nodeGroup.nodes[this.currentNodeGroupItemIndex_ + 1];
}
/**
* Updates the state.
*/
private onStateChanged_(
state: chrome.accessibilityPrivate.SelectToSpeakState): void {
if (this.state_ !== state) {
if (state === SelectToSpeakState.INACTIVE) {
this.clearFocusRingAndNode_();
}
// Send state change event to Chrome.
chrome.accessibilityPrivate.setSelectToSpeakState(state);
this.state_ = state;
}
}
/**
* Cancels the current speech queue.
* @param clearFocusRing Whether to clear the focus ring as well.
*/
private cancelIfSpeaking_(clearFocusRing: boolean): void {
if (clearFocusRing) {
this.stopAll_();
} else {
// Just stop speech
this.ttsManager_.stop();
}
}
/**
* @return Promise that resolves to whether the given node
* should be considered in the foreground or not.
*/
private isNodeInForeground_(node: AutomationNode): Promise<boolean> {
return new Promise(resolve => {
this.desktop_!.hitTestWithReply(
node.location.left, node.location.top, nodeAtLocation => {
chrome.automation.getFocus(focusedNode => {
const window =
NodeUtils.getNearestContainingWindow(nodeAtLocation);
const currentWindow = NodeUtils.getNearestContainingWindow(node);
if (currentWindow != null && window != null &&
currentWindow === window) {
resolve(true);
return;
}
if (UiManager.isPanel(window) ||
UiManager.isPanel(
NodeUtils.getNearestContainingWindow(focusedNode))) {
// If the focus is on the Select-to-speak panel or the hit test
// landed on the panel, treat the current node as if it is in
// the foreground.
resolve(true);
return;
}
if (focusedNode && currentWindow) {
// See if the focused node window matches the currentWindow.
// This may happen in some cases, for example, ARC++, when the
// window which received the hit test request is not part of the
// tree that contains the actual content. In such cases, use
// focus to get the appropriate root.
const focusedWindow =
NodeUtils.getNearestContainingWindow(focusedNode.root!);
if (focusedWindow != null && currentWindow === focusedWindow) {
resolve(true);
return;
}
}
resolve(false);
});
});
});
}
/**
* @return Current node that is being spoken.
*/
private getCurrentSpokenNode_(): AutomationNode|null {
if (!this.currentNodeGroupItem_) {
return null;
}
if (this.currentNodeGroupItem_.hasInlineText && this.currentNodeWord_) {
return ParagraphUtils.findInlineTextNodeByCharacterIndex(
this.currentNodeGroupItem_.node, this.currentNodeWord_.start);
} else if (
this.currentNodeGroupItem_.hasInlineText &&
this.shouldShowNavigationControls_()) {
// If navigation controls are enabled, but word highlighting is disabled
// (currentNodeWord_ === null), still find the inline text node so the
// focus ring will highlight the whole block.
return ParagraphUtils.findInlineTextNodeByCharacterIndex(
this.currentNodeGroupItem_.node, 0);
}
// No inline text or word highlighting and navigation controls are
// disabled.
return this.currentNodeGroupItem_.node;
}
/**
* Updates the UI based on the current STS and node state.
* @return Promise that resolves when operation is complete.
*/
private async updateUi_(): Promise<void> {
if (this.currentNodeGroupItem_ === null) {
// Nothing to do.
return;
}
// Determine whether current node is in the foreground. If node has no
// location, assume it is not in the foreground.
const node = this.currentNodeGroupItem_.node;
const inForeground = node.location !== undefined ?
await this.isNodeInForeground_(node) :
false;
// Verify that current node item is still pointing to the same node after
// asynchronous |isNodeInForeground_| operation.
if (this.currentNodeGroupItem_ === null ||
this.currentNodeGroupItem_.node !== node) {
return;
}
const nodeState = NodeUtils.getNodeState(node);
if (nodeState === NodeUtils.NodeState.NODE_STATE_INVALID ||
nodeState === NodeUtils.NodeState.NODE_STATE_INVISIBLE ||
!inForeground) {
// Current node is in background or node is invalid/invisible.
this.uiManager_.clear();
return;
}
const spokenNode = this.getCurrentSpokenNode_();
const currentNodeGroup = this.getCurrentNodeGroup_();
if (!currentNodeGroup || !spokenNode) {
console.warn('Could not update UI; no node group or spoken node');
return;
}
if (this.scrollToSpokenNode_ && spokenNode.state!['offscreen']) {
spokenNode.makeVisible();
}
const currentWord = this.prefsManager_.wordHighlightingEnabled() ?
this.currentNodeWord_ :
null;
this.uiManager_.update(currentNodeGroup, spokenNode, currentWord, {
showPanel: this.shouldShowNavigationControls_(),
paused: this.isPaused_(),
speechRateMultiplier: this.speechRateMultiplier_,
});
}
/**
* Shows a dialog to the user on first-run after enhanced voices update,
* showing privacy disclaimer and asking if the user wants to turn on enhanced
* network voices.
*
* @param callback Called back after user has confirmed or
* canceled in the dialog.
*/
private maybeShowEnhancedVoicesDialog_(callback: () => any): void {
if (!this.prefsManager_.enhancedVoicesDialogShown() &&
this.prefsManager_.enhancedNetworkVoicesAllowed()) {
// TODO(crbug.com/1230227): Style this dialog to match UX mocks.
const title =
chrome.i18n.getMessage('select_to_speak_natural_voice_dialog_title');
const description = chrome.i18n.getMessage(
'select_to_speak_natural_voice_dialog_description');
const cancelName =
chrome.i18n.getMessage('select_to_speak_natural_voice_dialog_cancel');
chrome.accessibilityPrivate.showConfirmationDialog(
title, description, cancelName, confirm => {
this.prefsManager_.setEnhancedNetworkVoicesFromDialog(confirm);
if (callback !== undefined) {
callback();
}
});
} else {
// Flag not set or already shown, so we can continue the control flow
// synchronously.
if (callback !== undefined) {
callback();
}
}
}
/**
* Updates the currently highlighted node word based on the current text
* and the character index of an event.
* @param text The current text
* @param charIndex The index of a current event in the text.
* @param optStartIndex The index at which to start the
* highlight. This takes precedence over the charIndex.
*/
private updateNodeHighlight_(
text: string, charIndex: number, optStartIndex?: number): void {
if (charIndex >= text.length) {
// No need to do work if we are at the end of the paragraph.
return;
}
// Get the next word based on the event's charIndex.
const nextWordStart = WordUtils.getNextWordStart(
text, charIndex, this.currentNodeGroupItem_!);
// The |WordUtils.getNextWordEnd| will find the correct end based on the
// trimmed text, so there is no need to provide additional input like
// optStartIndex.
const nextWordEnd = WordUtils.getNextWordEnd(
text, optStartIndex === undefined ? nextWordStart : optStartIndex,
this.currentNodeGroupItem_!);
// Map the next word into the node's index from the text.
const nodeStart = optStartIndex === undefined ?
nextWordStart - this.currentNodeGroupItem_!.startChar :
optStartIndex - this.currentNodeGroupItem_!.startChar;
const nodeEnd = Math.min(
nextWordEnd - this.currentNodeGroupItem_!.startChar,
NodeUtils.nameLength(this.currentNodeGroupItem_!.node));
if ((this.currentNodeWord_ == null ||
nodeStart >= this.currentNodeWord_.end) &&
nodeStart <= nodeEnd) {
// Only update the bounds if they have increased from the
// previous node. Because tts may send multiple callbacks
// for the end of one word and the beginning of the next,
// checking that the current word has changed allows us to
// reduce extra work.
this.currentNodeWord_ = {'start': nodeStart, 'end': nodeEnd};
this.updateUi_();
}
}
/**
* @return Current speech rate.
*/
private getSpeechRate_(): number {
// Multiply default speech rate with user-selected multiplier.
const rate = this.prefsManager_.speechRate() * this.speechRateMultiplier_;
// Then round to the nearest tenth (ex. 1.799999 becomes 1.8).
return Math.round(rate * 10) / 10;
}
/**
* @return Whether all given nodes support the navigation panel.
*/
private isNavigationPanelSupported_(nodes: AutomationNode[]): boolean {
if (nodes.length === 0) {
return true;
}
if (nodes.length === 1 && nodes[0] === nodes[0].root && nodes[0].parent &&
nodes[0].parent.root &&
nodes[0].parent.root.role === RoleType.DESKTOP) {
// If the selected node is a root node within the desktop, such as a
// a browser window, then do not show the navigation panel. There will
// be no where for the user to navigate to. Also panel could be clipped
// offscreen if the window is fullscreened.
return false;
}
// Do not show panel on system UI. System UI can be problematic due to
// auto-dismissing behavior (see http://crbug.com/1157148), but also
// navigation controls do not work well for control-rich interfaces that are
// light on text (and therefore no sentence and paragraph structures).
return !nodes.some(n => n.root && n.root.role === RoleType.DESKTOP);
}
/**
* @param keysPressed Which keys to pretend are currently pressed.
*/
protected sendMockSelectToSpeakKeysPressedChanged(keysPressed: number[]):
void {
this.inputHandler_!.onKeysPressedChanged(new Set(keysPressed));
}
/**
* Fires a mock mouse down event for testing.
* @param type The event type.
* @param mouseX The mouse x coordinate in global screen coordinates.
* @param mouseY The mouse y coordinate in global screen coordinates.
*/
protected fireMockMouseEvent(
type: chrome.accessibilityPrivate.SyntheticMouseEventType, mouseX: number,
mouseY: number): void {
this.inputHandler_!.onMouseEvent(type, mouseX, mouseY);
}
/**
* TODO(crbug.com/950391): Consider adding a metric for when voice switching
* gets used.
*/
private shouldUseVoiceSwitching_(): boolean {
return this.prefsManager_.voiceSwitchingEnabled();
}
/**
* Used by C++ tests to ensure STS load is completed.
* @param callback Callback for when desktop is loaded from
* automation.
*/
setOnLoadDesktopCallbackForTest(callback: () => any): void {
if (!this.desktop_) {
this.onLoadDesktopCallbackForTest_ = callback;
return;
}
// Desktop already loaded.
callback();
}
}
TestImportManager.exportForTesting(getGSuiteAppRoot);