// Copyright 2014 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/40285824): Remove this and convert code to safer constructs. #pragma allow_unsafe_buffers #endif #include "components/query_parser/snippet.h" #include <stddef.h> #include <algorithm> #include "base/strings/string_split.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" namespace query_parser { namespace { // A sample document to compute snippets of. // The \x bits after the first "Google" are UTF-8 of U+2122 TRADE MARK SIGN, // and are useful for verifying we don't screw up in UTF-8/UTF-16 conversion. const char kSampleDocument[] = …; } // namespace // Thai sample taken from http://www.google.co.th/intl/th/privacy.html // TODO(jungshik) : Add more samples (e.g. Hindi) after porting // ICU 4.0's character iterator changes to our copy of ICU 3.8 to get // grapheme clusters in Indic scripts handled more reasonably. const char* kThaiSample = …; // Comparator for sorting by the first element in a pair. bool ComparePair1st(const Snippet::MatchPosition& a, const Snippet::MatchPosition& b) { … } // For testing, we'll compute the match positions manually instead of using // sqlite's FTS matching. BuildSnippet returns the snippet for matching // |query| against |document|. Matches are surrounded by "**". std::u16string BuildSnippet(const std::string& document, const std::string& query) { … } TEST(Snippets, SimpleQuery) { … } // Test that two words that are near each other don't produce two elided bits. TEST(Snippets, NearbyWords) { … } // The above tests already test that we get byte offsets correct, but here's // one that gets the "TM" in its snippet. TEST(Snippets, UTF8) { … } TEST(Snippets, ThaiUTF8) { … } TEST(Snippets, ExtractMatchPositions) { … } } // namespace query_parser