#include "mediapipe/tasks/cc/text/tokenizers/bert_tokenizer.h"
#include <cstdint>
#include "tensorflow_text/core/kernels/regex_split.h"
namespace mediapipe {
namespace tasks {
namespace text {
namespace tokenizers {
FlatHashMapBackedWordpiece::FlatHashMapBackedWordpiece(
const std::vector<std::string>& vocab)
: … { … }
tensorflow::text::LookupStatus FlatHashMapBackedWordpiece::Contains(
absl::string_view key, bool* value) const { … }
bool FlatHashMapBackedWordpiece::LookupId(const absl::string_view key,
int* result) const { … }
bool FlatHashMapBackedWordpiece::LookupWord(int vocab_id,
absl::string_view* result) const { … }
TokenizerResult BertTokenizer::Tokenize(const std::string& input) { … }
WordpieceTokenizerResult BertTokenizer::TokenizeWordpiece(
const std::string& input) const { … }
}
}
}
}