bdict_writer.cc | Explore in Territory

// Copyright 2011 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "third_party/hunspell/google/bdict_writer.h"

#include <stddef.h>
#include <stdint.h>

#include "base/check.h"
#include "base/containers/span.h"
#include "base/strings/stringprintf.h"
#include "third_party/hunspell/google/bdict.h"

namespace hunspell {

// Represents one node the word trie in memory. This does not have to be very
// efficient since it is only used when building.
class DicNode { … };

namespace {

void SerializeTrie(const DicNode* node, std::string* output);

// Returns true if the nth character in the given word is |ch|. Will return
// false when there is no nth character. Note that this will also match an
// implicit NULL at the end of the string.
bool NthCharacterIs(const std::string& word, size_t n, char ch) { … }

// Recursively build the trie data structure for the range in the |words| list
// in [begin, end). It is assumed that all words in that range will have the
// same |node_depth - 2| characters at the beginning. This node will key off of
// the |node_depth - 1| character, with a special case for the root.
//
// |prefix_chars| is how deep this node is in the trie (and corresponds to how
// many letters of the word we will skip). The root level will have
// |prefix_chars| of 0.
//
// The given |node| will be filled with the data. The return value is the
// index into the |words| vector of the next word to process. It will be
// equal to |end| when all words have been consumed.
size_t BuildTrie(const BDictWriter::WordList& words,
                 size_t begin, size_t end,
                 size_t node_depth, DicNode* node) { … }

// Lookup tables are complicated. They can have a magic 0th entry not counted
// in the table dimensions, and also have indices only for the used sub-range.
// This function will compute the starting point and size of a lookup table,
// in addition to whether it should have the magic 0th entry for the given
// list of child nodes.
void ComputeLookupStrategyDetails(const std::vector<DicNode*>& children,
                                  bool* has_0th_entry,
                                  int* first_item,
                                  int* list_size) { … }

// Recursively fills in the storage strategy for this node and each of its
// children. This must be done before actually serializing because the storage
// mode will depend on the size of the children.
size_t ComputeTrieStorage(DicNode* node) { … }

// Serializes the given node when it is DicNode::LEAF* to the output.
void SerializeLeaf(const DicNode* node, std::string* output) { … }

// Serializes the given node when it is DicNode::LIST* to the output.
void SerializeList(const DicNode* node, std::string* output) { … }

// Serializes the given node when it is DicNode::LOOKUP* to the output.
void SerializeLookup(const DicNode* node, std::string* output) { … }

// Recursively serializes this node and all of its children to the output.
void SerializeTrie(const DicNode* node, std::string* output) { … }

// Serializes the given list of strings with 0 bytes separating them. The end
// will be marked by a double-0.
void SerializeStringListNullTerm(const std::vector<std::string>& strings,
                                 std::string* output) { … }

void SerializeReplacements(
    const std::vector< std::pair<std::string, std::string> >& repl,
    std::string* output) { … }

}  // namespace

BDictWriter::BDictWriter() : … { … }

BDictWriter::~BDictWriter() { … }

void BDictWriter::SetWords(const WordList& words) { … }

std::string BDictWriter::GetBDict() const { … }

void BDictWriter::SerializeAff(std::string* output) const { … }

}  // namespace hunspell
chromium/third_party/hunspell/google/bdict_writer.cc