// Copyright 2014 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/40284755): Remove this and spanify to fix the errors. #pragma allow_unsafe_buffers #endif // All data that is passed through a WebSocket with type "Text" needs to be // validated as UTF8. Since this is done on the IO thread, it needs to be // reasonably fast. // We are only interested in the performance on valid UTF8. Invalid UTF8 will // result in a connection failure, so is unlikely to become a source of // performance issues. #include "base/i18n/streaming_utf8_validator.h" #include <stddef.h> #include <string> #include <string_view> #include "base/functional/bind.h" #include "base/functional/callback.h" #include "base/strings/string_util.h" #include "base/strings/stringprintf.h" #include "base/test/perf_time_logger.h" #include "testing/gtest/include/gtest/gtest.h" namespace base { namespace { // We want to test ranges of valid UTF-8 sequences. These ranges are inclusive. // They are intended to be large enough that the validator needs to do // meaningful work while being in some sense "realistic" (eg. control characters // are not included). const char kOneByteSeqRangeStart[] = …; // U+0020 const char kOneByteSeqRangeEnd[] = …; // U+007E const char kTwoByteSeqRangeStart[] = …; // U+00A0 non-breaking space const char kTwoByteSeqRangeEnd[] = …; // U+024F small y with stroke const char kThreeByteSeqRangeStart[] = …; // U+3042 Hiragana "a" const char kThreeByteSeqRangeEnd[] = …; // U+9FC3 "to blink" const char kFourByteSeqRangeStart[] = …; // U+2000B const char kFourByteSeqRangeEnd[] = …; // U+2A6B2 // The different lengths of strings to test. const size_t kTestLengths[] = …; // Simplest possible byte-at-a-time validator, to provide a baseline // for comparison. This is only tried on 1-byte UTF-8 sequences, as // the results will not be meaningful with sequences containing // top-bit-set bytes. bool IsString7Bit(const std::string& s) { … } // Assumes that |previous| is a valid UTF-8 sequence, and attempts to return // the next one. Is just barely smart enough to iterate through the ranges // defined about. std::string NextUtf8Sequence(const std::string& previous) { … } TestTargetType; // Run fuction |target| over |test_string| |times| times, and report the results // using |description|. bool RunTest(const std::string& description, TestTargetType target, const std::string& test_string, int times) { … } // Construct a string by repeating |input| enough times to equal or exceed // |length|. std::string ConstructRepeatedTestString(const std::string& input, size_t length) { … } // Construct a string by expanding the range of UTF-8 sequences // between |input_start| and |input_end|, inclusive, and then // repeating the resulting string until it equals or exceeds |length| // bytes. |input_start| and |input_end| must be valid UTF-8 // sequences. std::string ConstructRangedTestString(const std::string& input_start, const std::string& input_end, size_t length) { … } struct TestFunctionDescription { … }; bool IsStringUTF8(const std::string& str) { … } // IsString7Bit is intentionally placed last so it can be excluded easily. const TestFunctionDescription kTestFunctions[] = …; // Construct a test string from |construct_test_string| for each of the lengths // in |kTestLengths| in turn. For each string, run each test in |test_functions| // for a number of iterations such that the total number of bytes validated // is around 16MB. void RunSomeTests( const char format[], base::RepeatingCallback<std::string(size_t length)> construct_test_string, const TestFunctionDescription* test_functions, size_t test_count) { … } TEST(StreamingUtf8ValidatorPerfTest, OneByteRepeated) { … } TEST(StreamingUtf8ValidatorPerfTest, OneByteRange) { … } TEST(StreamingUtf8ValidatorPerfTest, TwoByteRepeated) { … } TEST(StreamingUtf8ValidatorPerfTest, TwoByteRange) { … } TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRepeated) { … } TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRange) { … } TEST(StreamingUtf8ValidatorPerfTest, FourByteRepeated) { … } TEST(StreamingUtf8ValidatorPerfTest, FourByteRange) { … } } // namespace } // namespace base