#include "tensorflow_lite_support/cc/text/tokenizers/regex_tokenizer.h"
#include <iostream>
#include "absl/strings/str_cat.h"
#include "absl/strings/substitute.h"
#include "tensorflow_lite_support/cc/utils/common_utils.h"
namespace tflite {
namespace support {
namespace text {
namespace tokenizer {
namespace {
constexpr char kStart[] = …;
constexpr char kPad[] = …;
constexpr char kUnknown[] = …;
void buildIndexTokenMap(
const absl::node_hash_map<std::string, int>& token_index_map,
absl::node_hash_map<int, absl::string_view>* index_token_map) { … }
}
RegexTokenizer::RegexTokenizer(const std::string& regex_pattern,
const std::string& path_to_vocab)
: … { … }
RegexTokenizer::RegexTokenizer(const std::string& regex_pattern,
const char* vocab_buffer_data,
size_t vocab_buffer_size)
: … { … }
TokenizerResult RegexTokenizer::Tokenize(const std::string& input) { … }
bool RegexTokenizer::LookupId(absl::string_view key, int* result) const { … }
bool RegexTokenizer::LookupWord(int vocab_id, absl::string_view* result) const { … }
bool RegexTokenizer::GetStartToken(int* start_token) { … }
bool RegexTokenizer::GetPadToken(int* pad_token) { … }
bool RegexTokenizer::GetUnknownToken(int* unknown_token) { … }
}
}
}
}