chromium/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java

// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

package org.chromium.chrome.browser.contextualsearch;

import android.text.TextUtils;

import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import androidx.annotation.VisibleForTesting;

import org.jni_zero.CalledByNative;
import org.jni_zero.NativeClassQualifiedName;
import org.jni_zero.NativeMethods;

/**
 * Provides a context in which to search, and links to the native ContextualSearchContext.
 * Includes the selection, selection offsets, surrounding page content, etc.
 * Requires an override of #onSelectionChanged to call when a non-empty selection is established
 * or changed.
 */
public abstract class ContextualSearchContext {
    static final int INVALID_OFFSET = -1;

    // Non-visible word-break marker.
    private static final int SOFT_HYPHEN_CHAR = '\u00AD';

    // Pointer to the native instance of this class.
    private long mNativePointer;

    // Whether this context has had the required properties set so it can Resolve a Search Term.
    private boolean mHasSetResolveProperties;

    // A shortened version of the actual text content surrounding the selection, or null if not yet
    // established.
    private String mSurroundingText;

    // The start and end offsets of the selection within the text content.
    private int mSelectionStartOffset = INVALID_OFFSET;
    private int mSelectionEndOffset = INVALID_OFFSET;

    // The detected language of the context, or {@code null} if not yet detected, and empty if
    // it cannot be reliably determined.
    private String mDetectedLanguage;

    // The offset of an initial Tap gesture within the text content.
    private int mTapOffset = INVALID_OFFSET;

    // The selection being resolved, or null if no resolve has started.
    private String mSelectionBeingResolved;

    // The original encoding of the base page.
    private String mEncoding;

    // The word that was tapped, as analyzed internally before selection takes place,
    // or {@code null} if no analysis has been done yet.
    private String mWordTapped;

    // The offset of the tapped word within the surrounding text or {@code INVALID_OFFSET} if not
    // yet analyzed.

    // The offset of the tap within the tapped word, or {@code INVALID_OFFSET} if not yet analyzed.
    private int mTapWithinWordOffset = INVALID_OFFSET;

    // Translation members.
    private @NonNull String mTargetLanguage = "";
    private @NonNull String mFluentLanguages = "";

    // The Related Searches stamp - non-empty when Related Searches are being requested.
    private String mRelatedSearchesStamp;

    /** Constructs a context that tracks the selection and some amount of page content. */
    ContextualSearchContext() {
        mNativePointer = ContextualSearchContextJni.get().init(this);
        mHasSetResolveProperties = false;
    }

    /**
     * Updates a context to be able to resolve a search term and have a large amount of
     * page content.
     * @param homeCountry The country where the user usually resides, or an empty string if not
     *        known.
     * @param doSendBasePageUrl Whether the base-page URL should be sent to the server.
     * @param targetLanguage The language to translate into, in case translation might be needed.
     * @param fluentLanguages An ordered comma-separated list of ISO 639 language codes that
     *        the user can read fluently, or an empty string.
     */
    void setResolveProperties(
            @NonNull String homeCountry,
            boolean doSendBasePageUrl,
            @NonNull String targetLanguage,
            @NonNull String fluentLanguages) {
        // TODO(donnd): consider making this a constructor variation.
        mHasSetResolveProperties = true;
        ContextualSearchContextJni.get()
                .setResolveProperties(getNativePointer(), this, homeCountry, doSendBasePageUrl);
        mTargetLanguage = targetLanguage;
        mFluentLanguages = fluentLanguages;
    }

    /**
     * This method should be called to clean up storage when an instance of this class is
     * no longer in use.  The ContextualSearchContextJni.get().destroy will call the destructor on
     * the native instance.
     */
    void destroy() {
        assert mNativePointer != 0;
        ContextualSearchContextJni.get().destroy(mNativePointer, this);
        mNativePointer = 0;

        // Also zero out private data that may be sizable.
        mSurroundingText = null;
    }

    /**
     * Sets the surrounding text and selection offsets assuming UTF-8 and no insertion-point
     * support.
     * @param surroundingText The text from the base page surrounding the selection.
     * @param startOffset The offset of start the selection.
     * @param endOffset The offset of the end of the selection
     */
    @VisibleForTesting
    void setSurroundingText(String surroundingText, int startOffset, int endOffset) {
        setSurroundingText("UTF-8", surroundingText, startOffset, endOffset);
    }

    /**
     * Sets the surrounding text and selection offsets.
     * @param encoding The original encoding of the base page.
     * @param surroundingText The text from the base page surrounding the selection.
     * @param startOffset The offset of start the selection.
     * @param endOffset The offset of the end of the selection
     */
    @VisibleForTesting
    void setSurroundingText(
            String encoding, String surroundingText, int startOffset, int endOffset) {
        assert startOffset <= endOffset;
        mEncoding = encoding;
        mSurroundingText = surroundingText;
        mSelectionStartOffset = startOffset;
        mSelectionEndOffset = endOffset;
        if (startOffset == endOffset
                && startOffset <= surroundingText.length()
                && !hasAnalyzedTap()) {
            analyzeTap(startOffset);
        }
        // Notify of a changed selection if it's not empty.
        if (endOffset > startOffset) {
            onSelectionChanged();
        }
        // Detect the language of the surroundings or the selection.
        setTranslationLanguages(getDetectedLanguage(), mTargetLanguage, mFluentLanguages);
    }

    /**
     * @return The text that surrounds the selection, or {@code null} if none yet known.
     */
    @Nullable
    String getSurroundingText() {
        return mSurroundingText;
    }

    /**
     * @return The offset into the surrounding text of the start of the selection, or
     *         {@link #INVALID_OFFSET} if not yet established.
     */
    int getSelectionStartOffset() {
        return mSelectionStartOffset;
    }

    /**
     * @return The offset into the surrounding text of the end of the selection, or
     *         {@link #INVALID_OFFSET} if not yet established.
     */
    int getSelectionEndOffset() {
        return mSelectionEndOffset;
    }

    /**
     * @return The original encoding of the base page.
     */
    String getEncoding() {
        return mEncoding;
    }

    /**
     * @return The selection being resolved, or {@code null} if no resolve has been
     * requested.
     */
    @Nullable
    String getSelectionBeingResolved() {
        return mSelectionBeingResolved;
    }

    /**
     * @return The text content that follows the selection (one side of the surrounding text).
     */
    String getTextContentFollowingSelection() {
        if (mSurroundingText != null
                && mSelectionEndOffset > 0
                && mSelectionEndOffset <= mSurroundingText.length()) {
            return mSurroundingText.substring(mSelectionEndOffset);
        } else {
            return "";
        }
    }

    /**
     * @return Whether this context can Resolve the Search Term.
     */
    boolean canResolve() {
        return mHasSetResolveProperties && hasValidSelection();
    }

    /**
     * Prepares the Context to be used in a Resolve request by supplying last minute parameters.
     * If this call is not made before a Resolve then defaults are used (not exact and not a
     * Related Search).
     * @param isExactSearch Specifies whether this search must be exact -- meaning the resolve must
     *        return a non-expanding result that matches the selection exactly.
     * @param relatedSearchesStamp Information to be attached to the Resolve request that is needed
     *        for Related Searches. If this string is empty then no Related Searches results will
     *        be requested.
     */
    void prepareToResolve(boolean isExactSearch, String relatedSearchesStamp) {
        mSelectionBeingResolved = getCurrentSelection();
        mRelatedSearchesStamp = relatedSearchesStamp;
        ContextualSearchContextJni.get()
                .prepareToResolve(mNativePointer, this, isExactSearch, relatedSearchesStamp);
    }

    /**
     * Notifies of an adjustment that has been applied to the start and end of the selection.
     * @param startAdjust A signed value indicating the direction of the adjustment to the start of
     *        the selection (typically a negative value when the selection expands).
     * @param endAdjust A signed value indicating the direction of the adjustment to the end of
     *        the selection (typically a positive value when the selection expands).
     */
    void onSelectionAdjusted(int startAdjust, int endAdjust) {
        // Fully track the selection as it changes.
        mSelectionStartOffset += startAdjust;
        mSelectionEndOffset += endAdjust;
        ContextualSearchContextJni.get()
                .adjustSelection(getNativePointer(), this, startAdjust, endAdjust);
        // Notify of changes.
        onSelectionChanged();
    }

    /** Returns the current selection, or an empty string if there is no valid selection. */
    private String getCurrentSelection() {
        if (TextUtils.isEmpty(mSurroundingText)
                || mSelectionEndOffset < mSelectionStartOffset
                || mSelectionStartOffset < 0
                || mSelectionEndOffset > mSurroundingText.length()) {
            return "";
        }
        return mSurroundingText.substring(mSelectionStartOffset, mSelectionEndOffset);
    }

    /** @return the current selection, or an empty string if data is invalid or nothing selected. */
    String getSelection() {
        if (TextUtils.isEmpty(mSurroundingText)
                || mSelectionEndOffset < mSelectionStartOffset
                || mSelectionStartOffset < 0
                || mSelectionEndOffset > mSurroundingText.length()) {
            return "";
        }
        return mSurroundingText.substring(mSelectionStartOffset, mSelectionEndOffset);
    }

    /** Notifies this instance that the selection has been changed. */
    abstract void onSelectionChanged();

    /**
     * Gets the language of the current context's content by calling the native CLD3 detector if
     * needed.
     * @return An ISO 639 language code string, or an empty string if the language cannot be
     *         reliably determined.
     */
    @NonNull
    String getDetectedLanguage() {
        assert mSurroundingText != null;
        if (mDetectedLanguage == null) {
            mDetectedLanguage =
                    ContextualSearchContextJni.get().detectLanguage(mNativePointer, this);
        }
        return mDetectedLanguage;
    }

    /**
     * Pushes the given languages down to the native ContextualSearchContext.
     * @param detectedLanguage An ISO 639 language code string for the language to translate from.
     * @param targetLanguage An ISO 639 language code string to translation into.
     * @param fluentLanguages An ordered comma-separated list of ISO 639 language codes that
     *        the user can read fluently, or an empty string.
     */
    @VisibleForTesting
    void setTranslationLanguages(
            @NonNull String detectedLanguage,
            @NonNull String targetLanguage,
            @NonNull String fluentLanguages) {
        // Set redundant languages to empty strings.
        fluentLanguages = targetLanguage.equals(fluentLanguages) ? "" : fluentLanguages;
        // The target language is essential in order to provide results the user can read, and if
        // not specified the server may fallback onto a guess based on location, which isn't
        // always a good experience.
        ContextualSearchContextJni.get()
                .setTranslationLanguages(
                        mNativePointer, this, detectedLanguage, targetLanguage, fluentLanguages);
    }

    // ============================================================================================
    // Content Analysis.
    // ============================================================================================

    /**
     * @return Whether this context has valid Surrounding text and initial Tap offset.
     */
    @VisibleForTesting
    boolean hasValidTappedText() {
        return !TextUtils.isEmpty(mSurroundingText)
                && mTapOffset >= 0
                && mTapOffset <= mSurroundingText.length();
    }

    /**
     * @return Whether this context has a valid selection, which may be an insertion point.
     */
    @VisibleForTesting
    boolean hasValidSelection() {
        return !TextUtils.isEmpty(mSurroundingText)
                && mSelectionStartOffset != INVALID_OFFSET
                && mSelectionEndOffset != INVALID_OFFSET
                && mSelectionStartOffset < mSelectionEndOffset
                && mSelectionEndOffset < mSurroundingText.length();
    }

    /**
     * @return Whether a Tap gesture has occurred and been analyzed.
     */
    @VisibleForTesting
    boolean hasAnalyzedTap() {
        return mTapOffset >= 0;
    }

    /**
     * @return The word tapped, or {@code null} if the word that was tapped cannot be identified by
     *         the current limited parsing capability.
     * @see #analyzeTap(int)
     */
    String getWordTapped() {
        return mWordTapped;
    }

    /**
     * @return The offset of the tap within the tapped word, or {@code INVALID_OFFSET} if the tapped
     *         word cannot be identified by the current parsing capability.
     * @see #analyzeTap(int)
     */
    int getTapOffsetWithinTappedWord() {
        return mTapWithinWordOffset;
    }

    /**
     * Finds the words around the initial Tap offset by expanding and looking for word-breaks.
     * This mimics the Blink word-segmentation invoked by SelectWordAroundCaret and similar
     * selection logic, but is only appropriate for limited use.  Does not work on ideographic
     * languages and possibly many other cases.  Should only be used only for ML signal evaluation.
     * @param tapOffset The offset of the Tap within the surrounding text.
     */
    private void analyzeTap(int tapOffset) {
        mTapOffset = tapOffset;
        mWordTapped = null;
        mTapWithinWordOffset = INVALID_OFFSET;

        assert hasValidTappedText();

        int wordStartOffset = findWordStartOffset(mTapOffset);
        int wordEndOffset = findWordEndOffset(mTapOffset);
        if (wordStartOffset == INVALID_OFFSET || wordEndOffset == INVALID_OFFSET) return;

        mWordTapped = mSurroundingText.substring(wordStartOffset, wordEndOffset);
        mTapWithinWordOffset = mTapOffset - wordStartOffset;
    }

    /**
     * @return The start of the word that contains the given initial offset, within the surrounding
     *         text, or {@code INVALID_OFFSET} if not found.
     */
    private int findWordStartOffset(int initial) {
        // Scan before, aborting if we hit any ideographic letter.
        for (int offset = initial - 1; offset >= 0; offset--) {
            if (isWordBreakAtIndex(offset)) {
                // The start of the word is after this word break.
                return offset + 1;
            }
        }

        return INVALID_OFFSET;
    }

    /**
     * Finds the offset of the end of the word that includes the given initial offset.
     * NOTE: this is the index of the character just past the last character of the word,
     * so a 3 character word "who" has start index 0 and end index 3.
     * The character at the initial offset is examined and each one after that too until a non-word
     * character is encountered, and that offset will be returned.
     * @param initial The initial offset to scan from.
     * @return The end of the word that contains the given initial offset, within the surrounding
     *         text.
     */
    private int findWordEndOffset(int initial) {
        // Scan after, aborting if we hit any CJKV letter.
        for (int offset = initial; offset < mSurroundingText.length(); offset++) {
            if (isWordBreakAtIndex(offset)) {
                // The end of the word is the offset of this word break.
                return offset;
            }
        }
        return INVALID_OFFSET;
    }

    /**
     * @return Whether the character at the given index is a word-break.
     */
    private boolean isWordBreakAtIndex(int index) {
        return !Character.isLetterOrDigit(mSurroundingText.charAt(index))
                && mSurroundingText.charAt(index) != SOFT_HYPHEN_CHAR;
    }

    // ============================================================================================
    // Test support.
    // ============================================================================================

    @VisibleForTesting
    String getRelatedSearchesStamp() {
        return mRelatedSearchesStamp;
    }

    // ============================================================================================
    // Native callback support.
    // ============================================================================================

    @CalledByNative
    private long getNativePointer() {
        assert mNativePointer != 0;
        return mNativePointer;
    }

    @NativeMethods
    interface Natives {
        @NativeClassQualifiedName("NativeContextualSearchContext")
        long init(ContextualSearchContext caller);

        @NativeClassQualifiedName("NativeContextualSearchContext")
        void destroy(long nativeContextualSearchContext, ContextualSearchContext caller);

        @NativeClassQualifiedName("NativeContextualSearchContext")
        void setResolveProperties(
                long nativeContextualSearchContext,
                ContextualSearchContext caller,
                String homeCountry,
                boolean doSendBasePageUrl);

        @NativeClassQualifiedName("NativeContextualSearchContext")
        void adjustSelection(
                long nativeContextualSearchContext,
                ContextualSearchContext caller,
                int startAdjust,
                int endAdjust);

        @NativeClassQualifiedName("NativeContextualSearchContext")
        String detectLanguage(long nativeContextualSearchContext, ContextualSearchContext caller);

        @NativeClassQualifiedName("NativeContextualSearchContext")
        void setTranslationLanguages(
                long nativeContextualSearchContext,
                ContextualSearchContext caller,
                String detectedLanguage,
                String targetLanguage,
                String fluentLanguages);

        @NativeClassQualifiedName("NativeContextualSearchContext")
        void prepareToResolve(
                long nativeContextualSearchContext,
                ContextualSearchContext caller,
                boolean isExactSearch,
                String relatedSearchesStamp);
    }
}