ngram_hash_ops_utils.cc | Explore in Territory

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "components/language_detection/core/ngram_hash_ops_utils.h"

#include <cstring>

#include "third_party/utf/src/include/utf.h"

namespace language_detection {

constexpr char kPrefix[] = …;
constexpr char kSuffix[] = …;
constexpr char kReplacementToken[] = …;

TokenizedOutput::TokenizedOutput() = default;

TokenizedOutput::~TokenizedOutput() = default;

TokenizedOutput::TokenizedOutput(const TokenizedOutput& rhs) = default;

TokenizedOutput Tokenize(const char* input_str,
                         size_t len,
                         size_t max_tokens,
                         bool exclude_nonalphaspace_tokens) { … }

void LowercaseUnicodeStr(const char* input_str,
                         int len,
                         std::string* output_str) { … }

}  // namespace language_detection
chromium/components/language_detection/core/ngram_hash_ops_utils.cc