chromium/third_party/tensorflow_models/src/research/seq_flow_lite/tf_ops/projection_util.h

/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_MODELS_SEQ_FLOW_LITE_TF_OPS_PROJECTION_UTIL_H_
#define TENSORFLOW_MODELS_SEQ_FLOW_LITE_TF_OPS_PROJECTION_UTIL_H_

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "icu4c/source/common/unicode/utf8.h"

inline constexpr int kFirstCapOffset =;
inline constexpr int kAllCapsOffset =;
inline constexpr int kWordNoveltyOffset =;
inline constexpr int kDocSizeOffset =;

inline constexpr char kMurmurHash[] =;
inline constexpr char kXfixHash8[] =;
inline constexpr char kXfixHash16[] =;
inline constexpr char kXfixHash32[] =;
inline constexpr char kUnicodeHash8[] =;
inline constexpr char kUnicodeHash16[] =;

// A base class that specifies the interface for a hash engine used by the
// projection operator.
class HashEngine {};

// A hashing wrapper class that can hash a string and generate a hash code with
// requested number of features (two bit values). Some of the implementations
// are copied from murmurhash.
class Hasher {};

// Unicode processor for tensorflow and tflite string projection ops.
class ProjectionUnicodeHandler {};

inline constexpr size_t kEntireString =;
inline constexpr size_t kAllTokens =;

std::vector<std::string> SplitBySpace(const char* input_ptr, size_t len,
                                      size_t max_input, size_t max_tokens);

std::vector<std::string> SplitByChar(const char* input_ptr, size_t len,
                                     size_t max_tokens);

std::string JoinPairsBySpace(std::vector<std::pair<const char*, size_t>> words);

#endif  // TENSORFLOW_MODELS_SEQ_FLOW_LITE_TF_OPS_PROJECTION_UTIL_H_