// Copyright 2024 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "components/url_deduplication/docs_url_strip_handler.h" #include <string> #include <vector> #include "base/containers/fixed_flat_set.h" #include "base/containers/lru_cache.h" #include "base/no_destructor.h" #include "base/strings/escape.h" #include "base/strings/string_util.h" #include "components/url_formatter/url_formatter.h" #include "net/base/url_util.h" #include "third_party/re2/src/re2/re2.h" #include "url/gurl.h" // TODO(crbug.com/353966074) There is a plan to avoid/consolidate any // duplicated code as this borrows from: // components/omnibox/browser/document_provider.cc namespace { // Verify if the host could possibly be for a valid doc URL. This is a more // lightweight check than `ExtractDocIdFromUrl()`. It can be done before // unescaping the URL as valid hosts don't contain escapable chars; unescaping // is relatively expensive. E.g., 'docs.google.com' isn't a valid doc URL, but // it's host looks like it could be, so return true. On the other hand, // 'google.com' is definitely not a doc URL so return false. bool ValidHostPrefix(const std::string& host) { … } // Derived from google3/apps/share/util/docs_url_extractor.cc. std::string ExtractDocIdFromUrl(const std::string& url) { … } } // namespace namespace url_deduplication { GURL DocsURLStripHandler::StripExtraParams(GURL url) { … } } // namespace url_deduplication