// Copyright 2016 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License.! #ifndef MODEL_INTERFACE_H_ #define MODEL_INTERFACE_H_ #include <memory> #include <set> #include <string> #include <utility> #include <vector> #include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "common.h" #include "normalizer.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" #include "third_party/darts_clone/darts.h" #include "util.h" namespace sentencepiece { // "_this_is_a_pen" => ["_this", "_is", "_a", "_pen"] std::vector<absl::string_view> SplitIntoWords( absl::string_view text, bool treat_ws_as_suffix = false, bool allow_ws_only_pieces = false); // Converts byte (0-255) to piece (e.g., 58 -> "<0x3A>"). std::string ByteToPiece(unsigned char c); // Converts piece to byte (e.g., "<0x3A>" -> 58). Returns -1 if `piece` is not // a valid byte piece. int PieceToByte(absl::string_view piece); EncodeResult; NBestEncodeResult; class ModelProto; // Underlying model interface. // Given a normalized string, returns a sequence of sentence pieces with ids. class ModelInterface { … }; } // namespace sentencepiece #endif // MODEL_INTERFACE_H_