chromium/components/language/ios/browser/ios_language_detection_tab_helper.mm

// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/language/ios/browser/ios_language_detection_tab_helper.h"

#include "base/functional/bind.h"
#include "base/memory/ptr_util.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/utf_string_conversions.h"
#include "base/timer/elapsed_timer.h"
#include "components/language/core/browser/url_language_histogram.h"
#include "components/language/ios/browser/language_detection_java_script_feature.h"
#include "components/language/ios/browser/string_clipping_util.h"
#include "components/prefs/pref_member.h"
#include "components/translate/core/browser/translate_pref_names.h"
#include "components/translate/core/common/language_detection_details.h"
#include "components/translate/core/common/translate_constants.h"
#include "components/translate/core/common/translate_util.h"
#include "components/translate/core/language_detection/language_detection_model.h"
#include "components/translate/core/language_detection/language_detection_util.h"
#import "ios/web/common/url_scheme_util.h"
#include "ios/web/public/js_messaging/web_frame.h"
#import "ios/web/public/navigation/navigation_context.h"
#include "net/http/http_response_headers.h"

namespace language {

// Note: This should stay in sync with the constant in language_detection.js.
const size_t kMaxIndexChars = 65535;

namespace {
// Name for the UMA metric used to track language detection evaluation duration.
const char kTranslateLanguageDetectionTFLiteModelEvaluationDuration[] =
    "Translate.LanguageDetection.TFLiteModelEvaluationDuration";

// The old CLD model version.
const char kCLDModelVersion[] = "CLD3";

// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class LanguageDetectionMethod {
  kTFLiteModelUsed = 0,
  kTFLiteModelUnavailable = 1,
  kTFLiteModelDisabled = 2,
  kTFLiteModelIgnored = 3,
  kMaxValue = kTFLiteModelIgnored
};

enum class LanguageDetectionComparison {
  kTFLiteModelOnly = 0,
  kCLD3ModelOnly = 1,
  kBothModelFailed = 2,
  kBothModelAgree = 3,
  kBothModelDisagree = 4,
  kMaxValue = kBothModelDisagree
};

void ComparePageLanguageDetection(const std::string& tflite_language,
                                  const std::string& cld3_language) {
  bool tflite_failed = tflite_language.empty() ||
                       tflite_language == translate::kUnknownLanguageCode;
  bool cld3_failed =
      cld3_language.empty() || cld3_language == translate::kUnknownLanguageCode;

  if (tflite_failed) {
    if (cld3_failed) {
      base::UmaHistogramEnumeration(
          "IOS.Translate.PageLoad.LanguageDetectionComparison",
          LanguageDetectionComparison::kBothModelFailed);
    } else {
      base::UmaHistogramEnumeration(
          "IOS.Translate.PageLoad.LanguageDetectionComparison",
          LanguageDetectionComparison::kCLD3ModelOnly);
    }
  } else {
    if (cld3_failed) {
      base::UmaHistogramEnumeration(
          "IOS.Translate.PageLoad.LanguageDetectionComparison",
          LanguageDetectionComparison::kTFLiteModelOnly);
    } else if (cld3_language == tflite_language) {
      base::UmaHistogramEnumeration(
          "IOS.Translate.PageLoad.LanguageDetectionComparison",
          LanguageDetectionComparison::kBothModelAgree);
    } else {
      base::UmaHistogramEnumeration(
          "IOS.Translate.PageLoad.LanguageDetectionComparison",
          LanguageDetectionComparison::kBothModelDisagree);
    }
  }
}
}  // namespace

IOSLanguageDetectionTabHelper::IOSLanguageDetectionTabHelper(
    web::WebState* web_state,
    UrlLanguageHistogram* url_language_histogram,
    translate::LanguageDetectionModel* language_detection_model,
    PrefService* prefs)
    : web_state_(web_state),
      url_language_histogram_(url_language_histogram),
      language_detection_model_(language_detection_model),
      weak_method_factory_(this) {
  DCHECK(web_state_);

  translate_enabled_.Init(translate::prefs::kOfferTranslateEnabled, prefs);
  // Attempt to detect language since preloaded tabs will not execute
  // WebStateObserver::PageLoaded.
  StartLanguageDetection();
  web_state_->AddObserver(this);
  web::WebFramesManager* web_frames_manager =
      LanguageDetectionJavaScriptFeature::GetInstance()->GetWebFramesManager(
          web_state);
  web_frames_manager->AddObserver(this);
}

IOSLanguageDetectionTabHelper::~IOSLanguageDetectionTabHelper() {
  for (auto& observer : observer_list_) {
    observer.IOSLanguageDetectionTabHelperWasDestroyed(this);
  }
  if (web_state_) {
    web_state_->RemoveObserver(this);
    web_state_ = nullptr;
  }
}

void IOSLanguageDetectionTabHelper::AddObserver(Observer* observer) {
  observer_list_.AddObserver(observer);
}

void IOSLanguageDetectionTabHelper::RemoveObserver(Observer* observer) {
  observer_list_.RemoveObserver(observer);
}

void IOSLanguageDetectionTabHelper::OnLanguageDetermined(
    const translate::LanguageDetectionDetails& details) {
  // Update language histogram.
  if (url_language_histogram_ && details.is_model_reliable) {
    url_language_histogram_->OnPageVisited(details.model_detected_language);
  }

  for (auto& observer : observer_list_) {
    observer.OnLanguageDetermined(details);
  }
}

// web::WebFramesManager::Observer

void IOSLanguageDetectionTabHelper::WebFrameBecameAvailable(
    web::WebFramesManager* web_frames_manager,
    web::WebFrame* web_frame) {
  if (web_frame->IsMainFrame() && waiting_for_main_frame_) {
    waiting_for_main_frame_ = false;
    StartLanguageDetection();
  }
}

// web::WebStateObserver implementation:

void IOSLanguageDetectionTabHelper::PageLoaded(
    web::WebState* web_state,
    web::PageLoadCompletionStatus load_completion_status) {
  DCHECK_EQ(web_state_, web_state);
  if (load_completion_status == web::PageLoadCompletionStatus::SUCCESS)
    StartLanguageDetection();
}

void IOSLanguageDetectionTabHelper::DidStartNavigation(
    web::WebState* web_state,
    web::NavigationContext* navigation_context) {
  waiting_for_main_frame_ = false;
}

void IOSLanguageDetectionTabHelper::DidFinishNavigation(
    web::WebState* web_state,
    web::NavigationContext* navigation_context) {
  DCHECK_EQ(web_state_, web_state);
  if (navigation_context->IsSameDocument()) {
    StartLanguageDetection();
  } else {
    ExtractContentLanguageHeader(navigation_context->GetResponseHeaders());
  }
}

void IOSLanguageDetectionTabHelper::WebStateDestroyed(
    web::WebState* web_state) {
  DCHECK_EQ(web_state_, web_state);
  web_state_->RemoveObserver(this);
  web_state_ = nullptr;
}

void IOSLanguageDetectionTabHelper::StartLanguageDetection() {
  // Translate setting should not cancel language detection, except if it is
  // disabled by policy.
  if (!translate_enabled_.GetValue() && translate_enabled_.IsManaged()) {
    return;
  }
  DCHECK(web_state_);
  const GURL& url = web_state_->GetVisibleURL();
  if (!web::UrlHasWebScheme(url) || !web_state_->ContentIsHTML())
    return;

  web::WebFramesManager* web_frames_manager =
      LanguageDetectionJavaScriptFeature::GetInstance()->GetWebFramesManager(
          web_state_);
  web::WebFrame* web_frame = web_frames_manager->GetMainWebFrame();
  if (!web_frame) {
    waiting_for_main_frame_ = true;
    return;
  }

  LanguageDetectionJavaScriptFeature::GetInstance()->StartLanguageDetection(
      web_frame);
}

// Select the correct DeterminePageLanguage to call based on the feature flags.
std::string IOSLanguageDetectionTabHelper::DeterminePageLanguage(
    const std::string& code,
    const std::string& html_lang,
    const std::u16string& contents,
    std::string* model_detected_language,
    bool* is_model_reliable,
    float& model_reliability_score,
    std::string* detection_model_version) {
  if (translate::IsTFLiteLanguageDetectionEnabled() &&
      language_detection_model_ && language_detection_model_->IsAvailable()) {
    base::ElapsedTimer timer;
    std::string tflite_language =
        language_detection_model_->DeterminePageLanguage(
            code, html_lang, contents, model_detected_language,
            is_model_reliable, model_reliability_score);
    base::UmaHistogramTimes(
        kTranslateLanguageDetectionTFLiteModelEvaluationDuration,
        timer.Elapsed());

    if (!translate::IsTFLiteLanguageDetectionIgnoreEnabled()) {
      *detection_model_version = language_detection_model_->GetModelVersion();
      base::UmaHistogramEnumeration(
          "IOS.Translate.PageLoad.LanguageDetectionMethod",
          LanguageDetectionMethod::kTFLiteModelUsed);
      return tflite_language;
    }

    base::UmaHistogramEnumeration(
        "IOS.Translate.PageLoad.LanguageDetectionMethod",
        LanguageDetectionMethod::kTFLiteModelIgnored);
    std::string cld3_language = ::translate::DeterminePageLanguage(
        code, html_lang, contents, model_detected_language, is_model_reliable,
        model_reliability_score);

    ComparePageLanguageDetection(tflite_language, cld3_language);
    return cld3_language;
  }

  if (translate::IsTFLiteLanguageDetectionEnabled()) {
    base::UmaHistogramEnumeration(
        "IOS.Translate.PageLoad.LanguageDetectionMethod",
        LanguageDetectionMethod::kTFLiteModelUnavailable);
  } else {
    base::UmaHistogramEnumeration(
        "IOS.Translate.PageLoad.LanguageDetectionMethod",
        LanguageDetectionMethod::kTFLiteModelDisabled);
  }
  return ::translate::DeterminePageLanguage(
      code, html_lang, contents, model_detected_language, is_model_reliable,
      model_reliability_score);
}

void IOSLanguageDetectionTabHelper::OnTextRetrieved(
    const bool has_notranslate,
    const std::string& js_http_content_language,
    const std::string& html_lang,
    const GURL& url,
    const base::Value* text_content) {
  if (!web_state_ || web_state_->IsBeingDestroyed()) {
    // If the webState is destroyed, this callback will still be called as the
    // request is cancelled.
    return;
  }
  // If there is no language defined in httpEquiv, use the HTTP header.
  const std::string http_content_language = js_http_content_language.empty()
                                                ? content_language_header_
                                                : js_http_content_language;

  std::string model_detected_language;
  bool is_model_reliable;
  float model_reliability_score = 0.0;
  std::u16string text = text_content && text_content->is_string()
                            ? base::UTF8ToUTF16(text_content->GetString())
                            : std::u16string();

  std::string detection_model_version = kCLDModelVersion;

  std::string language =
      DeterminePageLanguage(http_content_language, html_lang,
                            GetStringByClippingLastWord(text, kMaxIndexChars),
                            &model_detected_language, &is_model_reliable,
                            model_reliability_score, &detection_model_version);

  if (language.empty())
    return;  // No language detected.

  // Avoid an unnecessary copy of the full text content (which can be
  // ~64kB) until we need it on iOS (e.g. for the translate internals
  // page).
  translate::LanguageDetectionDetails details;
  details.time = base::Time::Now();
  details.url = url;
  details.has_notranslate = has_notranslate;
  details.content_language = http_content_language;
  details.model_detected_language = model_detected_language;
  details.is_model_reliable = is_model_reliable;
  details.html_root_language = html_lang;
  details.adopted_language = language;
  details.detection_model_version = detection_model_version;

  OnLanguageDetermined(details);
}

base::WeakPtr<IOSLanguageDetectionTabHelper>
IOSLanguageDetectionTabHelper::GetWeakPtr() {
  return weak_method_factory_.GetWeakPtr();
}

void IOSLanguageDetectionTabHelper::ExtractContentLanguageHeader(
    net::HttpResponseHeaders* headers) {
  if (!headers) {
    content_language_header_.clear();
    return;
  }

  headers->GetNormalizedHeader("content-language", &content_language_header_);
  // Remove everything after the comma ',' if any.
  size_t comma_index = content_language_header_.find_first_of(',');
  if (comma_index != std::string::npos)
    content_language_header_.resize(comma_index);
}

WEB_STATE_USER_DATA_KEY_IMPL(IOSLanguageDetectionTabHelper)

}  // namespace language