chromium/components/ukm/ios/ukm_url_recorder.mm

// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/ukm/ios/ukm_url_recorder.h"

#include <set>
#include <utility>

#include "base/containers/contains.h"
#include "base/containers/flat_map.h"
#include "ios/web/public/navigation/navigation_context.h"
#include "ios/web/public/web_state.h"
#include "ios/web/public/web_state_observer.h"
#include "ios/web/public/web_state_user_data.h"
#include "services/metrics/public/cpp/delegating_ukm_recorder.h"
#include "services/metrics/public/cpp/ukm_source_id.h"
#include "url/gurl.h"

namespace ukm {

namespace internal {

SourceUrlRecorderWebStateObserver::SourceUrlRecorderWebStateObserver(
    web::WebState* web_state)
    : last_committed_source_id_(kInvalidSourceId) {
  web_state->AddObserver(this);
}

SourceUrlRecorderWebStateObserver::~SourceUrlRecorderWebStateObserver() {}

void SourceUrlRecorderWebStateObserver::WebStateDestroyed(
    web::WebState* web_state) {
  web_state->RemoveObserver(this);
}

void SourceUrlRecorderWebStateObserver::DidStartNavigation(
    web::WebState* web_state,
    web::NavigationContext* navigation_context) {
  // NavigationContexts only exist for main frame navigations, so this will
  // never be called for non-main frame navigations and we don't need to filter
  // non-main frame navigations out here.

  // Additionally, at least for the time being, we don't track metrics for
  // same-document navigations (e.g. changes in URL fragment or URL changes
  // due to history.pushState) in UKM.
  if (navigation_context->IsSameDocument())
    return;

  // UKM doesn't want to record URLs for downloads. However, at the point a
  // navigation is started, we don't yet know if the navigation will result in a
  // download. Thus, we store the URL at the time a navigation was initiated
  // and only record it later, once we verify that the navigation didn't result
  // in a download.
  pending_navigations_.insert(std::make_pair(
      navigation_context->GetNavigationId(), navigation_context->GetUrl()));
}

void SourceUrlRecorderWebStateObserver::DidFinishNavigation(
    web::WebState* web_state,
    web::NavigationContext* navigation_context) {
  // NavigationContexts only exist for main frame navigations, so this will
  // never be called for non-main frame navigations and we don't need to filter
  // non-main frame navigations out here.

  auto it = pending_navigations_.find(navigation_context->GetNavigationId());
  if (it == pending_navigations_.end())
    return;

  DCHECK(!navigation_context->IsSameDocument());

  if (navigation_context->HasCommitted()) {
    last_committed_source_id_ = ConvertToSourceId(
        navigation_context->GetNavigationId(), SourceIdType::NAVIGATION_ID);
  }

  GURL initial_url = std::move(it->second);
  pending_navigations_.erase(it);

  // UKM doesn't want to record URLs for navigations that result in downloads.
  if (navigation_context->IsDownload())
    return;

  MaybeRecordUrl(navigation_context, initial_url);
}

SourceId SourceUrlRecorderWebStateObserver::GetLastCommittedSourceId() const {
  return last_committed_source_id_;
}

void SourceUrlRecorderWebStateObserver::MaybeRecordUrl(
    web::NavigationContext* navigation_context,
    const GURL& initial_url) {
  DCHECK(!navigation_context->IsSameDocument());
  const int64_t navigation_id = navigation_context->GetNavigationId();

  DelegatingUkmRecorder* ukm_recorder = DelegatingUkmRecorder::Get();
  if (!ukm_recorder)
    return;

  // Check to avoid recording the same navigation more than once in the UKM
  // recorder. On iOS, the logic to infer navigation from the limited signals
  // that WebKit is imperfect, since we don't get all the usual WebKit
  // navigation callbacks on JS-initiated same-document navigations.
  // For example, suppose a sequence visits is
  // 1. https://example.com#foo
  // 2. https://example.com#bar
  // 3. https://example.com#foo
  // The first and third visits to the same URL are erroneously considered the
  // same navigation, thus the NavigationContext from the first is reused. In
  // contrast, UKM considers all of these three different navigations, in line
  // with the logic onother platforms. As a workaround here, we skip recording
  // the last visit to #foo to UKM. Cases such as these are observed to be very
  // rare.
  if (base::Contains(navigation_ids_seen_, navigation_id)) {
    return;
  }

  const GURL& final_url = navigation_context->GetUrl();

  UkmSource::NavigationData navigation_data;
  // TODO(crbug.com/40587196): This check isn't quite correct, as self
  // redirecting is possible. This may also be changed to include the entire
  // redirect chain.
  if (final_url != initial_url)
    navigation_data.urls = {initial_url};
  navigation_data.urls.push_back(final_url);

  // TODO(crbug.com/41407501): Fill out the other fields in NavigationData.

  navigation_ids_seen_.insert(navigation_id);
  const ukm::SourceId source_id =
      ukm::ConvertToSourceId(navigation_id, ukm::SourceIdType::NAVIGATION_ID);
  ukm_recorder->RecordNavigation(source_id, navigation_data);
}

WEB_STATE_USER_DATA_KEY_IMPL(SourceUrlRecorderWebStateObserver)

}  // namespace internal

void InitializeSourceUrlRecorderForWebState(web::WebState* web_state) {
  internal::SourceUrlRecorderWebStateObserver::CreateForWebState(web_state);
}

internal::SourceUrlRecorderWebStateObserver*
GetSourceUrlRecorderForWebStateForWebState(web::WebState* web_state) {
  return internal::SourceUrlRecorderWebStateObserver::FromWebState(web_state);
}

SourceId GetSourceIdForWebStateDocument(web::WebState* web_state) {
  auto* obs = GetSourceUrlRecorderForWebStateForWebState(web_state);
  return obs ? obs->GetLastCommittedSourceId() : kInvalidSourceId;
}

}  // namespace ukm