// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * * Copyright (C) 2008-2015, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: uspoof_conf.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2009Jan05 (refactoring earlier files) * created by: Andy Heninger * * Internal classes for compiling confusable data into its binary (runtime) form. */ #include "unicode/utypes.h" #include "unicode/uspoof.h" #if !UCONFIG_NO_REGULAR_EXPRESSIONS #if !UCONFIG_NO_NORMALIZATION #include "unicode/unorm.h" #include "unicode/uregex.h" #include "unicode/ustring.h" #include "cmemory.h" #include "uspoof_impl.h" #include "uhash.h" #include "uvector.h" #include "uassert.h" #include "uarrsort.h" #include "uspoof_conf.h" U_NAMESPACE_USE //--------------------------------------------------------------------- // // buildConfusableData Compile the source confusable data, as defined by // the Unicode data file confusables.txt, into the binary // structures used by the confusable detector. // // The binary structures are described in uspoof_impl.h // // 1. Parse the data, making a hash table mapping from a UChar32 to a String. // // 2. Sort all of the strings encountered by length, since they will need to // be stored in that order in the final string table. // TODO: Sorting these strings by length is no longer needed since the removal of // the string lengths table. This logic can be removed to save processing time // when building confusables data. // // 3. Build a list of keys (UChar32s) from the four mapping tables. Sort the // list because that will be the ordering of our runtime table. // // 4. Generate the run time string table. This is generated before the key & value // tables because we need the string indexes when building those tables. // // 5. Build the run-time key and value tables. These are parallel tables, and are built // at the same time // SPUString::SPUString(LocalPointer<UnicodeString> s) { … } SPUString::~SPUString() { … } SPUStringPool::SPUStringPool(UErrorCode &status) : … { … } SPUStringPool::~SPUStringPool() { … } int32_t SPUStringPool::size() { … } SPUString *SPUStringPool::getByIndex(int32_t index) { … } // Comparison function for ordering strings in the string pool. // Compare by length first, then, within a group of the same length, // by code point order. // Conforms to the type signature for a USortComparator in uvector.h static int32_t U_CALLCONV SPUStringCompare(UHashTok left, UHashTok right) { … } void SPUStringPool::sort(UErrorCode &status) { … } SPUString *SPUStringPool::addString(UnicodeString *src, UErrorCode &status) { … } ConfusabledataBuilder::ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status) : … { … } ConfusabledataBuilder::~ConfusabledataBuilder() { … } void ConfusabledataBuilder::buildConfusableData(SpoofImpl * spImpl, const char * confusables, int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status) { … } void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen, UErrorCode &status) { … } // // outputData The confusable data has been compiled and stored in intermediate // collections and strings. Copy it from there to the final flat // binary array. // // Note that as each section is added to the output data, the // expand (reserveSpace() function will likely relocate it in memory. // Be careful with pointers. // void ConfusabledataBuilder::outputData(UErrorCode &status) { … } #endif #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS