// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** * Copyright (C) 2008-2013, International Business Machines Corporation * and others. All Rights Reserved. *************************************************************************** * * uspoof_impl.h * * Implementation header for spoof detection * */ #ifndef USPOOFIM_H #define USPOOFIM_H #include "uassert.h" #include "unicode/utypes.h" #include "unicode/uspoof.h" #include "unicode/uscript.h" #include "unicode/udata.h" #include "udataswp.h" #include "utrie2.h" #if !UCONFIG_NO_NORMALIZATION #ifdef __cplusplus #include "capi_helper.h" #include "umutex.h" U_NAMESPACE_BEGIN // The maximum length (in UTF-16 UChars) of the skeleton replacement string resulting from // a single input code point. This is function of the unicode.org data. #define USPOOF_MAX_SKELETON_EXPANSION … // The default stack buffer size for copies or conversions or normalizations // of input strings being checked. (Used in multiple places.) #define USPOOF_STACK_BUFFER_SIZE … // Magic number for sanity checking spoof data. #define USPOOF_MAGIC … // Magic number for sanity checking spoof checkers. #define USPOOF_CHECK_MAGIC … class ScriptSet; class SpoofData; struct SpoofDataHeader; class ConfusableDataUtils; /** * Class SpoofImpl corresponds directly to the plain C API opaque type * USpoofChecker. One can be cast to the other. */ class SpoofImpl : public UObject, public IcuCApiHelper<USpoofChecker, SpoofImpl, USPOOF_MAGIC> { … }; /** * Class CheckResult corresponds directly to the plain C API opaque type * USpoofCheckResult. One can be cast to the other. */ class CheckResult : public UObject, public IcuCApiHelper<USpoofCheckResult, CheckResult, USPOOF_CHECK_MAGIC> { … }; // // Confusable Mappings Data Structures, version 2.0 // // For the confusable data, we are essentially implementing a map, // key: a code point // value: a string. Most commonly one char in length, but can be more. // // The keys are stored as a sorted array of 32 bit ints. // bits 0-23 a code point value // bits 24-31 length of value string, in UChars (between 1 and 256 UChars). // The key table is sorted in ascending code point order. (not on the // 32 bit int value, the flag bits do not participate in the sorting.) // // Lookup is done by means of a binary search in the key table. // // The corresponding values are kept in a parallel array of 16 bit ints. // If the value string is of length 1, it is literally in the value array. // For longer strings, the value array contains an index into the strings table. // // String Table: // The strings table contains all of the value strings (those of length two or greater) // concatenated together into one long char16_t (UTF-16) array. // // There is no nul character or other mark between adjacent strings. // //---------------------------------------------------------------------------- // // Changes from format version 1 to format version 2: // 1) Removal of the whole-script confusable data tables. // 2) Removal of the SL/SA/ML/MA and multi-table flags in the key bitmask. // 3) Expansion of string length value in the key bitmask from 2 bits to 8 bits. // 4) Removal of the string lengths table since 8 bits is sufficient for the // lengths of all entries in confusables.txt. // Internal functions for manipulating confusable data table keys #define USPOOF_CONFUSABLE_DATA_FORMAT_VERSION … class ConfusableDataUtils { … }; //------------------------------------------------------------------------------------- // // SpoofData // // A small class that wraps the raw (usually memory mapped) spoof data. // Serves two primary functions: // 1. Convenience. Contains real pointers to the data, to avoid dealing with // the offsets in the raw data. // 2. Reference counting. When a spoof checker is cloned, the raw data is shared // and must be retained until all checkers using the data are closed. // Nothing in this struct includes state that is specific to any particular // USpoofDetector object. // //--------------------------------------------------------------------------------------- class SpoofData: public UMemory { … }; //--------------------------------------------------------------------------------------- // // Raw Binary Data Formats, as loaded from the ICU data file, // or as built by the builder. // //--------------------------------------------------------------------------------------- struct SpoofDataHeader { … }; U_NAMESPACE_END #endif /* __cplusplus */ /** * Endianness swap function for binary spoof data. * @internal */ U_CAPI int32_t U_EXPORT2 uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *status); #endif #endif /* USPOOFIM_H */