docs_url_strip_handler.cc | Explore in Territory

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_deduplication/docs_url_strip_handler.h"

#include <string>
#include <vector>

#include "base/containers/fixed_flat_set.h"
#include "base/containers/lru_cache.h"
#include "base/no_destructor.h"
#include "base/strings/escape.h"
#include "base/strings/string_util.h"
#include "components/url_formatter/url_formatter.h"
#include "net/base/url_util.h"
#include "third_party/re2/src/re2/re2.h"
#include "url/gurl.h"

// TODO(crbug.com/353966074) There is a plan to avoid/consolidate any
//  duplicated code as this borrows from:
//  components/omnibox/browser/document_provider.cc
namespace {
// Verify if the host could possibly be for a valid doc URL. This is a more
// lightweight check than `ExtractDocIdFromUrl()`. It can be done before
// unescaping the URL as valid hosts don't contain escapable chars; unescaping
// is relatively expensive. E.g., 'docs.google.com' isn't a valid doc URL, but
// it's host looks like it could be, so return true. On the other hand,
// 'google.com' is definitely not a doc URL so return false.
bool ValidHostPrefix(const std::string& host) { … }

// Derived from google3/apps/share/util/docs_url_extractor.cc.
std::string ExtractDocIdFromUrl(const std::string& url) { … }
}  // namespace

namespace url_deduplication {

GURL DocsURLStripHandler::StripExtraParams(GURL url) { … }

}  // namespace url_deduplication
chromium/components/url_deduplication/docs_url_strip_handler.cc