// Copyright 2011 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/40284755): Remove this and spanify to fix the errors. #pragma allow_unsafe_buffers #endif #ifndef BASE_STRINGS_STRING_TOKENIZER_H_ #define BASE_STRINGS_STRING_TOKENIZER_H_ #include <algorithm> #include <string> #include <string_view> #include "base/check.h" #include "base/strings/string_util.h" namespace base { // StringTokenizerT is a simple string tokenizer class. It works like an // iterator that with each step (see the Advance method) updates members that // refer to the next token in the input string. The user may optionally // configure the tokenizer to return delimiters. For the optional // WhitespacePolicy parameter, kSkipOver will cause the tokenizer to skip // over whitespace characters. The tokenizer never stops on a whitespace // character. // // EXAMPLE 1: // // char input[] = "this is a test"; // CStringTokenizer t(input, input + strlen(input), " "); // while (t.GetNext()) { // printf("%s\n", t.token().c_str()); // } // // Output: // // this // is // a // test // // // EXAMPLE 2: // // std::string input = "no-cache=\"foo, bar\", private"; // StringTokenizer t(input, ", "); // t.set_quote_chars("\""); // while (t.GetNext()) { // printf("%s\n", t.token().c_str()); // } // // Output: // // no-cache="foo, bar" // private // // // EXAMPLE 3: // // bool next_is_option = false, next_is_value = false; // std::string input = "text/html; charset=UTF-8; foo=bar"; // StringTokenizer t(input, "; ="); // t.set_options(StringTokenizer::RETURN_DELIMS); // while (t.GetNext()) { // if (t.token_is_delim()) { // switch (*t.token_begin()) { // case ';': // next_is_option = true; // break; // case '=': // next_is_value = true; // break; // } // } else { // const char* label; // if (next_is_option) { // label = "option-name"; // next_is_option = false; // } else if (next_is_value) { // label = "option-value"; // next_is_value = false; // } else { // label = "mime-type"; // } // printf("%s: %s\n", label, t.token().c_str()); // } // } // // // EXAMPLE 4: // // std::string input = "this, \t is, \t a, \t test"; // StringTokenizer t(input, ",", // StringTokenizer::WhitespacePolicy::kSkipOver); // while (t.GetNext()) { // printf("%s\n", t.token().c_str()); // } // // Output: // // this // is // a // test // // // TODO(danakj): This class is templated on the container and the iterator type, // but it strictly only needs to care about the `CharType`. However many users // expect to work with string and string::iterator for historical reasons. When // they are all working with `string_view`, then this class can be made to // unconditoinally use `std::basic_string_view<CharType>` and vend iterators of // that type, and we can drop the `str` and `const_iterator` aliases. template <class str, class const_iterator> class StringTokenizerT { … }; StringTokenizer; StringViewTokenizer; String16Tokenizer; CStringTokenizer; } // namespace base #endif // BASE_STRINGS_STRING_TOKENIZER_H_