// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * * Copyright (C) 2007-2012, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: bmpset.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2007jan29 * created by: Markus W. Scherer */ #include "unicode/utypes.h" #include "unicode/uniset.h" #include "unicode/utf8.h" #include "unicode/utf16.h" #include "cmemory.h" #include "bmpset.h" #include "uassert.h" U_NAMESPACE_BEGIN BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) : … { … } BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) : … { … } BMPSet::~BMPSet() { … } /* * Set bits in a bit rectangle in "vertical" bit organization. * start<limit<=0x800 */ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) { … } void BMPSet::initBits() { … } /* * Override some bits and bytes to the result of contains(FFFD) * for faster validity checking at runtime. * No need to set 0 values where they were reset to 0 in the constructor * and not modified by initBits(). * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF) * Need to set 0 values for surrogates D800..DFFF. */ void BMPSet::overrideIllegal() { … } int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const { … } UBool BMPSet::contains(UChar32 c) const { … } /* * Check for sufficient length for trail unit for each surrogate pair. * Handle single surrogates as surrogate code points as usual in ICU. */ const char16_t * BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCondition) const { … } /* Symmetrical with span(). */ const char16_t * BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spanCondition) const { … } /* * Precheck for sufficient trail bytes at end of string only once per span. * Check validity. */ const uint8_t * BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { … } /* * While going backwards through UTF-8 optimize only for ASCII. * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not * possible to tell from the last byte in a multi-byte sequence how many * preceding bytes there should be. Therefore, going backwards through UTF-8 * is much harder than going forward. */ int32_t BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { … } U_NAMESPACE_END