#include "nnet_language_identifier.h"
#include <math.h>
#include <algorithm>
#include <limits>
#include <string>
#include "base.h"
#include "embedding_network.h"
#include "registry.h"
#include "relevant_script_feature.h"
#include "script_span/generated_ulscript.h"
#include "script_span/getonescriptspan.h"
#include "script_span/text_processing.h"
#include "cld_3/protos/sentence.pb.h"
#include "sentence_features.h"
#include "task_context.h"
#include "workspace.h"
namespace chrome_lang_id {
namespace {
struct LangChunksStats { … };
bool OrderBySecondDescending(const std::pair<string, float> &x,
const std::pair<string, float> &y) { … }
bool ResultIsReliable(const string &language, float probability) { … }
int FindNumValidBytesToProcess(const string &text) { … }
}
const int NNetLanguageIdentifier::kMinNumBytesToConsider = …;
const int NNetLanguageIdentifier::kMaxNumBytesToConsider = …;
const int NNetLanguageIdentifier::kMaxNumInputBytesToConsider = …;
const int NNetLanguageIdentifier::kNumSnippets = …;
const char NNetLanguageIdentifier::kUnknown[] = …;
const float NNetLanguageIdentifier::kReliabilityThreshold = …;
const float NNetLanguageIdentifier::kReliabilityHrBsThreshold = …;
const string LanguageIdEmbeddingFeatureExtractor::ArgPrefix() const { … }
NNetLanguageIdentifier::NNetLanguageIdentifier()
: … { … }
static WholeSentenceFeature *cbog_factory() { … }
static WholeSentenceFeature *rsf_factory() { … }
static WholeSentenceFeature *sf_factory() { … }
NNetLanguageIdentifier::NNetLanguageIdentifier(int min_num_bytes,
int max_num_bytes)
: … { … }
NNetLanguageIdentifier::~NNetLanguageIdentifier() { … }
void NNetLanguageIdentifier::Setup(TaskContext *context) { … }
void NNetLanguageIdentifier::Init(TaskContext *context) { … }
void NNetLanguageIdentifier::GetFeatures(
Sentence *sentence, std::vector<FeatureVector> *features) const { … }
string NNetLanguageIdentifier::GetLanguageName(int language_id) const { … }
NNetLanguageIdentifier::Result NNetLanguageIdentifier::FindLanguage(
const string &text) { … }
NNetLanguageIdentifier::Result NNetLanguageIdentifier::FindLanguageOfValidUTF8(
const string &text) { … }
std::vector<NNetLanguageIdentifier::Result>
NNetLanguageIdentifier::FindTopNMostFreqLangs(const string &text,
int num_langs) { … }
string NNetLanguageIdentifier::SelectTextGivenScriptSpan(
const CLD2::LangSpan &script_span) { … }
string NNetLanguageIdentifier::SelectTextGivenBeginAndSize(
const char *text_begin, int text_size) { … }
}