// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#include "chrome/browser/offline_pages/background_loader_offliner.h"
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "base/functional/bind.h"
#include "base/json/json_writer.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/system/sys_info.h"
#include "base/task/single_thread_task_runner.h"
#include "base/time/time.h"
#include "chrome/browser/content_settings/page_specific_content_settings_delegate.h"
#include "chrome/browser/offline_pages/offline_page_mhtml_archiver.h"
#include "chrome/browser/offline_pages/offliner_helper.h"
#include "chrome/browser/offline_pages/offliner_user_data.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/renderer_preferences_util.h"
#include "chrome/browser/ssl/chrome_security_state_tab_helper.h"
#include "chrome/common/chrome_isolated_world_ids.h"
#include "components/content_settings/browser/page_specific_content_settings.h"
#include "components/offline_pages/core/background/offliner_policy.h"
#include "components/offline_pages/core/background/save_page_request.h"
#include "components/offline_pages/core/client_namespace_constants.h"
#include "components/offline_pages/core/offline_page_client_policy.h"
#include "components/offline_pages/core/offline_page_feature.h"
#include "components/offline_pages/core/offline_page_model.h"
#include "components/security_state/core/security_state.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/mhtml_extra_parts.h"
#include "content/public/browser/navigation_entry.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/web_contents.h"
#include "content/public/browser/web_contents_user_data.h"
#include "net/cert/cert_status_flags.h"
#include "net/http/http_response_headers.h"
namespace offline_pages {
namespace {
void HandleLoadTerminationCancel(
Offliner::CompletionCallback completion_callback,
const SavePageRequest& canceled_request) {
std::move(completion_callback)
.Run(canceled_request, Offliner::RequestStatus::FOREGROUND_CANCELED);
}
} // namespace
BackgroundLoaderOffliner::BackgroundLoaderOffliner(
content::BrowserContext* browser_context,
const OfflinerPolicy* policy,
OfflinePageModel* offline_page_model,
std::unique_ptr<LoadTerminationListener> load_termination_listener)
: browser_context_(browser_context),
offline_page_model_(offline_page_model),
policy_(policy),
load_termination_listener_(std::move(load_termination_listener)),
save_state_(NONE),
page_load_state_(SUCCESS),
network_bytes_(0LL),
is_low_bar_met_(false),
did_snapshot_on_last_retry_(false) {
DCHECK(offline_page_model_);
DCHECK(browser_context_);
// When the offliner is created for test harness runs, the
// |load_termination_listener_| will be set to nullptr, in order to prevent
// crashing, adding a check here.
if (load_termination_listener_)
load_termination_listener_->set_offliner(this);
for (int i = 0; i < ResourceDataType::RESOURCE_DATA_TYPE_COUNT; ++i) {
stats_[i].requested = 0;
stats_[i].completed = 0;
}
}
BackgroundLoaderOffliner::~BackgroundLoaderOffliner() {}
// static
BackgroundLoaderOffliner* BackgroundLoaderOffliner::FromWebContents(
content::WebContents* contents) {
Offliner* offliner = OfflinerUserData::OfflinerFromWebContents(contents);
// Today we only have one kind of offliner that uses OfflinerUserData. If we
// add other types, revisit this cast.
if (offliner)
return static_cast<BackgroundLoaderOffliner*>(offliner);
return nullptr;
}
bool BackgroundLoaderOffliner::LoadAndSave(
const SavePageRequest& request,
CompletionCallback completion_callback,
const ProgressCallback& progress_callback) {
DCHECK(completion_callback);
DCHECK(progress_callback);
DCHECK(offline_page_model_);
if (pending_request_) {
DVLOG(1) << "Already have pending request";
return false;
}
if (GetPolicy(request.client_id().name_space)
.requires_specific_user_settings &&
(AreThirdPartyCookiesBlocked(browser_context_) ||
IsNetworkPredictionDisabled(browser_context_))) {
DVLOG(1) << "WARNING: Unable to load when 3rd party cookies blocked or "
<< "prediction disabled";
return false;
}
if (!OfflinePageModel::CanSaveURL(request.url())) {
DVLOG(1) << "Not able to save page for requested url: " << request.url();
return false;
}
ResetLoader();
AttachObservers();
MarkLoadStartTime();
// Track copy of pending request.
pending_request_ = std::make_unique<SavePageRequest>(request);
completion_callback_ = std::move(completion_callback);
progress_callback_ = progress_callback;
// Load page attempt.
loader_.get()->LoadPage(request.url());
snapshot_controller_ = std::make_unique<BackgroundSnapshotController>(
base::SingleThreadTaskRunner::GetCurrentDefault(), this, false);
return true;
}
bool BackgroundLoaderOffliner::Cancel(CancelCallback callback) {
DCHECK(pending_request_);
// We ignore the case where pending_request_ is not set, but given the checks
// in RequestCoordinator this should not happen.
if (!pending_request_)
return false;
// TODO(chili): We are not able to cancel a pending
// OfflinePageModel::SaveSnapshot() operation. We will notify caller that
// cancel completed once the SavePage operation returns.
if (save_state_ != NONE) {
save_state_ = DELETE_AFTER_SAVE;
cancel_callback_ = std::move(callback);
return true;
}
// Post the cancel callback right after this call concludes.
base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask(
FROM_HERE, base::BindOnce(std::move(callback), *pending_request_.get()));
ResetState();
return true;
}
void BackgroundLoaderOffliner::TerminateLoadIfInProgress() {
if (!pending_request_)
return;
Cancel(base::BindOnce(HandleLoadTerminationCancel,
std::move(completion_callback_)));
}
bool BackgroundLoaderOffliner::HandleTimeout(int64_t request_id) {
if (pending_request_) {
DCHECK(request_id == pending_request_->request_id());
if (is_low_bar_met_ && (pending_request_->started_attempt_count() + 1 >=
policy_->GetMaxStartedTries() ||
pending_request_->completed_attempt_count() + 1 >=
policy_->GetMaxCompletedTries())) {
// If we are already in the middle of a save operation, let it finish
// but do not return SAVED_ON_LAST_RETRY
if (save_state_ == NONE) {
did_snapshot_on_last_retry_ = true;
StartSnapshot();
}
return true;
}
}
return false;
}
void BackgroundLoaderOffliner::CanDownload(
base::OnceCallback<void(bool)> callback) {
if (!pending_request_.get()) {
std::move(callback).Run(false); // Shouldn't happen though...
return;
}
bool should_allow_downloads = false;
Offliner::RequestStatus final_status =
Offliner::RequestStatus::LOADING_FAILED_DOWNLOAD;
// Check whether we should allow file downloads for this save page request.
// If we want to proceed with the file download, fail with
// DOWNLOAD_THROTTLED. If we don't want to proceed with the file download,
// fail with LOADING_FAILED_DOWNLOAD.
if (GetPolicy(pending_request_.get()->client_id().name_space)
.allows_conversion_to_background_file_download) {
should_allow_downloads = true;
final_status = Offliner::RequestStatus::DOWNLOAD_THROTTLED;
}
std::move(callback).Run(should_allow_downloads);
SavePageRequest request(*pending_request_.get());
std::move(completion_callback_).Run(request, final_status);
base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask(
FROM_HERE, base::BindOnce(&BackgroundLoaderOffliner::ResetState,
weak_ptr_factory_.GetWeakPtr()));
}
void BackgroundLoaderOffliner::MarkLoadStartTime() {
load_start_time_ = base::TimeTicks::Now();
}
void BackgroundLoaderOffliner::PrimaryMainDocumentElementAvailable() {
is_low_bar_met_ = true;
// Add this signal to signal_data_.
AddLoadingSignal("PrimaryMainDocumentElementAvailable");
}
void BackgroundLoaderOffliner::DocumentOnLoadCompletedInPrimaryMainFrame() {
if (!pending_request_.get()) {
DVLOG(1) << "DidStopLoading called even though no pending request.";
return;
}
// Add this signal to signal_data_.
AddLoadingSignal("DocumentOnLoadCompletedInPrimaryMainFrame");
snapshot_controller_->DocumentOnLoadCompletedInPrimaryMainFrame();
}
void BackgroundLoaderOffliner::PrimaryMainFrameRenderProcessGone(
base::TerminationStatus status) {
if (pending_request_) {
SavePageRequest request(*pending_request_.get());
switch (status) {
case base::TERMINATION_STATUS_OOM:
case base::TERMINATION_STATUS_PROCESS_CRASHED:
case base::TERMINATION_STATUS_STILL_RUNNING:
std::move(completion_callback_)
.Run(request, Offliner::RequestStatus::LOADING_FAILED_NO_NEXT);
break;
case base::TERMINATION_STATUS_PROCESS_WAS_KILLED:
default:
std::move(completion_callback_)
.Run(request, Offliner::RequestStatus::LOADING_FAILED);
}
ResetState();
}
}
void BackgroundLoaderOffliner::WebContentsDestroyed() {
if (pending_request_) {
SavePageRequest request(*pending_request_.get());
std::move(completion_callback_)
.Run(request, Offliner::RequestStatus::LOADING_FAILED);
ResetState();
}
}
void BackgroundLoaderOffliner::DidFinishNavigation(
content::NavigationHandle* navigation_handle) {
if (!navigation_handle->IsInPrimaryMainFrame())
return;
// If there was an error of any kind (certificate, client, DNS, etc),
// Mark as error page. Resetting here causes RecordNavigationMetrics to crash.
if (navigation_handle->IsErrorPage()) {
page_load_state_ = RETRIABLE_NET_ERROR;
} else {
int status_code = 200; // Default to OK.
// No response header can imply intermediate navigation state.
if (navigation_handle->GetResponseHeaders())
status_code = navigation_handle->GetResponseHeaders()->response_code();
// 2XX and 3XX are ok because they indicate success or redirection.
// We track 301 because it's MOVED_PERMANENTLY and usually accompanies an
// error page with new address.
// 400+ codes are client and server errors.
// We skip 418 because it's a teapot.
if (status_code == 301 || (status_code >= 400 && status_code != 418)) {
page_load_state_ = RETRIABLE_HTTP_ERROR;
}
}
}
void BackgroundLoaderOffliner::SetBackgroundSnapshotControllerForTest(
std::unique_ptr<BackgroundSnapshotController> controller) {
snapshot_controller_ = std::move(controller);
}
void BackgroundLoaderOffliner::ObserveResourceLoading(
ResourceLoadingObserver::ResourceDataType type,
bool started) {
// Add the signal to extra data, and use for tracking.
RequestStats& found_stats = stats_[type];
if (started)
++found_stats.requested;
else
++found_stats.completed;
}
void BackgroundLoaderOffliner::OnNetworkBytesChanged(int64_t bytes) {
if (pending_request_ && save_state_ != SAVING) {
network_bytes_ += bytes;
progress_callback_.Run(*pending_request_, network_bytes_);
}
}
void BackgroundLoaderOffliner::StartSnapshot() {
if (!pending_request_.get()) {
DVLOG(1) << "Pending request was cleared during delay.";
return;
}
DCHECK(is_low_bar_met_)
<< "Minimum quality must have been reached before saving a snapshot";
// Add this signal to signal_data_.
AddLoadingSignal("Snapshotting");
SavePageRequest request(*pending_request_.get());
// If there was an error navigating to page, return loading failed.
if (page_load_state_ != SUCCESS) {
Offliner::RequestStatus status;
switch (page_load_state_) {
case RETRIABLE_NET_ERROR:
status = Offliner::RequestStatus::LOADING_FAILED_NET_ERROR;
break;
case RETRIABLE_HTTP_ERROR:
status = Offliner::RequestStatus::LOADING_FAILED_HTTP_ERROR;
break;
case NONRETRIABLE:
status = Offliner::RequestStatus::LOADING_FAILED_NO_RETRY;
break;
default:
// We should've already checked for Success before entering here.
NOTREACHED_IN_MIGRATION();
status = Offliner::RequestStatus::LOADING_FAILED;
}
std::move(completion_callback_).Run(request, status);
ResetState();
return;
}
content::WebContents* web_contents(
content::WebContentsObserver::web_contents());
Offliner::RequestStatus loaded_page_error =
CanSavePageInBackground(web_contents);
if (loaded_page_error != Offliner::RequestStatus::UNKNOWN) {
std::move(completion_callback_).Run(request, loaded_page_error);
ResetState();
return;
}
save_state_ = SAVING;
std::unique_ptr<OfflinePageArchiver> archiver(new OfflinePageMHTMLArchiver());
OfflinePageModel::SavePageParams params;
params.url = web_contents->GetLastCommittedURL();
params.client_id = request.client_id();
params.proposed_offline_id = request.request_id();
params.is_background = true;
params.request_origin = request.request_origin();
// Pass in the original URL if it's different from last committed
// when redirects occur.
if (!request.original_url().is_empty())
params.original_url = request.original_url();
else if (params.url != request.url())
params.original_url = request.url();
offline_page_model_->SavePage(
params, std::move(archiver), web_contents,
base::BindOnce(&BackgroundLoaderOffliner::OnPageSaved,
weak_ptr_factory_.GetWeakPtr()));
}
void BackgroundLoaderOffliner::OnPageSaved(SavePageResult save_result,
int64_t offline_id) {
if (!pending_request_)
return;
SavePageRequest request(*pending_request_.get());
bool did_snapshot_on_last_retry = did_snapshot_on_last_retry_;
ResetState();
if (save_state_ == DELETE_AFTER_SAVE) {
// Delete the saved page off disk and from the OPM.
PageCriteria criteria;
criteria.offline_ids = std::vector<int64_t>{offline_id};
offline_page_model_->DeletePagesWithCriteria(
criteria,
base::BindOnce(&BackgroundLoaderOffliner::DeleteOfflinePageCallback,
weak_ptr_factory_.GetWeakPtr(), request));
save_state_ = NONE;
return;
}
save_state_ = NONE;
Offliner::RequestStatus save_status;
if (save_result == SavePageResult::ALREADY_EXISTS) {
save_status = RequestStatus::SAVED;
} else if (save_result == SavePageResult::SUCCESS) {
if (did_snapshot_on_last_retry)
save_status = RequestStatus::SAVED_ON_LAST_RETRY;
else
save_status = RequestStatus::SAVED;
} else {
save_status = RequestStatus::SAVE_FAILED;
}
std::move(completion_callback_).Run(request, save_status);
}
void BackgroundLoaderOffliner::DeleteOfflinePageCallback(
const SavePageRequest& request,
DeletePageResult result) {
std::move(cancel_callback_).Run(request);
}
void BackgroundLoaderOffliner::ResetState() {
pending_request_.reset();
// Stop snapshot controller from triggering any more events.
snapshot_controller_->Stop();
// Delete the snapshot controller after stack unwinds, so we don't
// corrupt stack in some edge cases. Deleting it soon should be safe because
// we check against pending_request_ with every action, and snapshot
// controller is configured to only call StartSnapshot once for BGL.
base::SingleThreadTaskRunner::GetCurrentDefault()->DeleteSoon(
FROM_HERE, snapshot_controller_.release());
page_load_state_ = SUCCESS;
network_bytes_ = 0LL;
is_low_bar_met_ = false;
did_snapshot_on_last_retry_ = false;
content::WebContentsObserver::Observe(nullptr);
loader_.reset();
for (int i = 0; i < ResourceDataType::RESOURCE_DATA_TYPE_COUNT; ++i) {
stats_[i].requested = 0;
stats_[i].completed = 0;
}
}
void BackgroundLoaderOffliner::ResetLoader() {
loader_ = std::make_unique<background_loader::BackgroundLoaderContents>(
browser_context_);
loader_->SetDelegate(this);
// Initialize web contents settings.
renderer_preferences_util::UpdateFromSystemSettings(
loader_->web_contents()->GetMutableRendererPrefs(),
Profile::FromBrowserContext(browser_context_));
content_settings::PageSpecificContentSettings::CreateForWebContents(
loader_->web_contents(),
std::make_unique<PageSpecificContentSettingsDelegate>(
loader_->web_contents()));
}
void BackgroundLoaderOffliner::AttachObservers() {
content::WebContents* contents = loader_->web_contents();
content::WebContentsObserver::Observe(contents);
OfflinerUserData::CreateForWebContents(contents, this);
}
void BackgroundLoaderOffliner::AddLoadingSignal(const char* signal_name) {
base::TimeTicks current_time = base::TimeTicks::Now();
base::TimeDelta delay_so_far = current_time - load_start_time_;
// We would prefer to use int64_t here, but JSON does not support that type.
// Given the choice between int and double, we choose to implicitly convert to
// a double since it maintains more precision (we can get a longer time in
// milliseconds than we can with a 2 bit int, 53 bits vs 32).
signal_data_.Set(signal_name, delay_so_far.InMillisecondsF());
}
void BackgroundLoaderOffliner::RenovationsCompleted() {
snapshot_controller_->RenovationsCompleted();
}
Offliner::RequestStatus BackgroundLoaderOffliner::CanSavePageInBackground(
content::WebContents* web_contents) {
DCHECK(is_low_bar_met_)
<< "Minimum quality must have been reached before checking loaded page";
std::unique_ptr<security_state::VisibleSecurityState> visible_security_state =
GetVisibleSecurityState(web_contents);
// Checks for HTTPS certificate errors (HTTP connections are not affected).
if (security_state::HasMajorCertificateError(*visible_security_state))
return Offliner::RequestStatus::LOADED_PAGE_HAS_CERTIFICATE_ERROR;
// Checks if the page is blocked by SafeBrowsing.
if (visible_security_state->malicious_content_status !=
security_state::MaliciousContentStatus::MALICIOUS_CONTENT_STATUS_NONE) {
return Offliner::RequestStatus::LOADED_PAGE_IS_BLOCKED;
}
// Don't save Chrome error or interstitial pages.
if (GetPageType(web_contents) != content::PageType::PAGE_TYPE_NORMAL)
return Offliner::RequestStatus::LOADED_PAGE_IS_CHROME_INTERNAL;
return Offliner::RequestStatus::UNKNOWN;
}
std::unique_ptr<security_state::VisibleSecurityState>
BackgroundLoaderOffliner::GetVisibleSecurityState(
content::WebContents* web_contents) {
// Note: this tab helper needs to be created here as in the background it is
// not created by default.
ChromeSecurityStateTabHelper::CreateForWebContents(web_contents);
SecurityStateTabHelper* helper =
SecurityStateTabHelper::FromWebContents(web_contents);
DCHECK(helper);
return helper->GetVisibleSecurityState();
}
content::PageType BackgroundLoaderOffliner::GetPageType(
content::WebContents* web_contents) {
DCHECK(web_contents->GetController().GetVisibleEntry())
<< "An entry must have committed at this WebContents";
return web_contents->GetController().GetVisibleEntry()->GetPageType();
}
} // namespace offline_pages