// Copyright 2011 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "third_party/hunspell/google/bdict_writer.h" #include <stddef.h> #include <stdint.h> #include "base/check.h" #include "base/containers/span.h" #include "base/strings/stringprintf.h" #include "third_party/hunspell/google/bdict.h" namespace hunspell { // Represents one node the word trie in memory. This does not have to be very // efficient since it is only used when building. class DicNode { … }; namespace { void SerializeTrie(const DicNode* node, std::string* output); // Returns true if the nth character in the given word is |ch|. Will return // false when there is no nth character. Note that this will also match an // implicit NULL at the end of the string. bool NthCharacterIs(const std::string& word, size_t n, char ch) { … } // Recursively build the trie data structure for the range in the |words| list // in [begin, end). It is assumed that all words in that range will have the // same |node_depth - 2| characters at the beginning. This node will key off of // the |node_depth - 1| character, with a special case for the root. // // |prefix_chars| is how deep this node is in the trie (and corresponds to how // many letters of the word we will skip). The root level will have // |prefix_chars| of 0. // // The given |node| will be filled with the data. The return value is the // index into the |words| vector of the next word to process. It will be // equal to |end| when all words have been consumed. size_t BuildTrie(const BDictWriter::WordList& words, size_t begin, size_t end, size_t node_depth, DicNode* node) { … } // Lookup tables are complicated. They can have a magic 0th entry not counted // in the table dimensions, and also have indices only for the used sub-range. // This function will compute the starting point and size of a lookup table, // in addition to whether it should have the magic 0th entry for the given // list of child nodes. void ComputeLookupStrategyDetails(const std::vector<DicNode*>& children, bool* has_0th_entry, int* first_item, int* list_size) { … } // Recursively fills in the storage strategy for this node and each of its // children. This must be done before actually serializing because the storage // mode will depend on the size of the children. size_t ComputeTrieStorage(DicNode* node) { … } // Serializes the given node when it is DicNode::LEAF* to the output. void SerializeLeaf(const DicNode* node, std::string* output) { … } // Serializes the given node when it is DicNode::LIST* to the output. void SerializeList(const DicNode* node, std::string* output) { … } // Serializes the given node when it is DicNode::LOOKUP* to the output. void SerializeLookup(const DicNode* node, std::string* output) { … } // Recursively serializes this node and all of its children to the output. void SerializeTrie(const DicNode* node, std::string* output) { … } // Serializes the given list of strings with 0 bytes separating them. The end // will be marked by a double-0. void SerializeStringListNullTerm(const std::vector<std::string>& strings, std::string* output) { … } void SerializeReplacements( const std::vector< std::pair<std::string, std::string> >& repl, std::string* output) { … } } // namespace BDictWriter::BDictWriter() : … { … } BDictWriter::~BDictWriter() { … } void BDictWriter::SetWords(const WordList& words) { … } std::string BDictWriter::GetBDict() const { … } void BDictWriter::SerializeAff(std::string* output) const { … } } // namespace hunspell