// Copyright 2016 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License.! #ifndef NORMALIZER_NORMALIZER_H_ #define NORMALIZER_NORMALIZER_H_ #include <memory> #include <set> #include <string> #include <utility> #include <vector> #include "absl/strings/string_view.h" #include "common.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" #include "third_party/darts_clone/darts.h" namespace sentencepiece { namespace normalizer { // Given a list of strings, finds the longest string which is a // prefix of a query. class PrefixMatcher { … }; // Normalizer implements a simple text normalizer with // user-defined string-to-string rules and leftmost longest // matching. The rules of Normalizer are built with // Builder::CompileCharsMap() method. Pre-compiled rules are // also available via Builder::GetPrecompiledCharsMap(<name>) method. // // The motivation of Normalizer is to make flexible, user-customizable // and self-contained normalizer. All the logic of normalization is // encoded in the model proto which allows us to define language/task // dependent normalization rules without breaking the default rule. class Normalizer { … }; } // namespace normalizer } // namespace sentencepiece #endif // NORMALIZER_NORMALIZER_H_