#if !STRINGLIB_IS_UNICODE
# error "codecs.h is specific to Unicode"
#endif
#include "pycore_bitutils.h"
#if (SIZEOF_SIZE_T == 8)
#define ASCII_CHAR_MASK …
#elif (SIZEOF_SIZE_T == 4)
#define ASCII_CHAR_MASK …
#else
# error C 'size_t' size should be either 4 or 8!
#endif
#define IS_CONTINUATION_BYTE(ch) …
Py_LOCAL_INLINE(Py_UCS4)
STRINGLIB(utf8_decode)(const char **inptr, const char *end,
STRINGLIB_CHAR *dest,
Py_ssize_t *outpos)
{ … }
#undef ASCII_CHAR_MASK
Py_LOCAL_INLINE(char *)
STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
PyObject *unicode,
const STRINGLIB_CHAR *data,
Py_ssize_t size,
_Py_error_handler error_handler,
const char *errors)
{ … }
#if SIZEOF_LONG == 8
#define UCS2_REPEAT_MASK …
#elif SIZEOF_LONG == 4
#define UCS2_REPEAT_MASK …
#else
# error C 'long' size should be either 4 or 8!
#endif
#if STRINGLIB_SIZEOF_CHAR == 1
#define FAST_CHAR_MASK …
#else
#define FAST_CHAR_MASK …
#endif
#define STRIPPED_MASK …
#define SWAB …
Py_LOCAL_INLINE(Py_UCS4)
STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
int native_ordering)
{ … }
#undef UCS2_REPEAT_MASK
#undef FAST_CHAR_MASK
#undef STRIPPED_MASK
#undef SWAB
#if STRINGLIB_MAX_CHAR >= 0x80
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,
Py_ssize_t len,
unsigned short **outptr,
int native_ordering)
{
unsigned short *out = *outptr;
const STRINGLIB_CHAR *end = in + len;
#if STRINGLIB_SIZEOF_CHAR == 1
if (native_ordering) {
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
while (in < unrolled_end) {
out[0] = in[0];
out[1] = in[1];
out[2] = in[2];
out[3] = in[3];
in += 4; out += 4;
}
while (in < end) {
*out++ = *in++;
}
} else {
#define SWAB2 …
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
while (in < unrolled_end) {
out[0] = SWAB2(in[0]);
out[1] = SWAB2(in[1]);
out[2] = SWAB2(in[2]);
out[3] = SWAB2(in[3]);
in += 4; out += 4;
}
while (in < end) {
Py_UCS4 ch = *in++;
*out++ = SWAB2((Py_UCS2)ch);
}
#undef SWAB2
}
*outptr = out;
return len;
#else
if (native_ordering) {
#if STRINGLIB_MAX_CHAR < 0x10000
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
while (in < unrolled_end) {
if (((in[0] ^ 0xd800) &
(in[1] ^ 0xd800) &
(in[2] ^ 0xd800) &
(in[3] ^ 0xd800) & 0xf800) == 0)
break;
out[0] = in[0];
out[1] = in[1];
out[2] = in[2];
out[3] = in[3];
in += 4; out += 4;
}
#endif
while (in < end) {
Py_UCS4 ch;
ch = *in++;
if (ch < 0xd800)
*out++ = ch;
else if (ch < 0xe000)
goto fail;
#if STRINGLIB_MAX_CHAR >= 0x10000
else if (ch >= 0x10000) {
out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
out[1] = Py_UNICODE_LOW_SURROGATE(ch);
out += 2;
}
#endif
else
*out++ = ch;
}
} else {
#define SWAB2 …
#if STRINGLIB_MAX_CHAR < 0x10000
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
while (in < unrolled_end) {
if (((in[0] ^ 0xd800) &
(in[1] ^ 0xd800) &
(in[2] ^ 0xd800) &
(in[3] ^ 0xd800) & 0xf800) == 0)
break;
out[0] = SWAB2(in[0]);
out[1] = SWAB2(in[1]);
out[2] = SWAB2(in[2]);
out[3] = SWAB2(in[3]);
in += 4; out += 4;
}
#endif
while (in < end) {
Py_UCS4 ch = *in++;
if (ch < 0xd800)
*out++ = SWAB2((Py_UCS2)ch);
else if (ch < 0xe000)
goto fail;
#if STRINGLIB_MAX_CHAR >= 0x10000
else if (ch >= 0x10000) {
Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
out[0] = SWAB2(ch1);
out[1] = SWAB2(ch2);
out += 2;
}
#endif
else
*out++ = SWAB2((Py_UCS2)ch);
}
#undef SWAB2
}
*outptr = out;
return len;
fail:
*outptr = out;
return len - (end - in + 1);
#endif
}
static inline uint32_t
STRINGLIB(SWAB4)(STRINGLIB_CHAR ch)
{
uint32_t word = ch;
#if STRINGLIB_SIZEOF_CHAR == 1
return (word << 24);
#elif STRINGLIB_SIZEOF_CHAR == 2
return ((word & 0x00FFu) << 24) | ((word & 0xFF00u) << 8);
#else
return _Py_bswap32(word);
#endif
}
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
Py_ssize_t len,
uint32_t **outptr,
int native_ordering)
{
uint32_t *out = *outptr;
const STRINGLIB_CHAR *end = in + len;
if (native_ordering) {
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
while (in < unrolled_end) {
#if STRINGLIB_SIZEOF_CHAR > 1
if (((in[0] ^ 0xd800) &
(in[1] ^ 0xd800) &
(in[2] ^ 0xd800) &
(in[3] ^ 0xd800) & 0xf800) == 0)
break;
#endif
out[0] = in[0];
out[1] = in[1];
out[2] = in[2];
out[3] = in[3];
in += 4; out += 4;
}
while (in < end) {
Py_UCS4 ch;
ch = *in++;
#if STRINGLIB_SIZEOF_CHAR > 1
if (Py_UNICODE_IS_SURROGATE(ch)) {
goto fail;
}
#endif
*out++ = ch;
}
} else {
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
while (in < unrolled_end) {
#if STRINGLIB_SIZEOF_CHAR > 1
if (((in[0] ^ 0xd800) &
(in[1] ^ 0xd800) &
(in[2] ^ 0xd800) &
(in[3] ^ 0xd800) & 0xf800) == 0)
break;
#endif
out[0] = STRINGLIB(SWAB4)(in[0]);
out[1] = STRINGLIB(SWAB4)(in[1]);
out[2] = STRINGLIB(SWAB4)(in[2]);
out[3] = STRINGLIB(SWAB4)(in[3]);
in += 4; out += 4;
}
while (in < end) {
Py_UCS4 ch = *in++;
#if STRINGLIB_SIZEOF_CHAR > 1
if (Py_UNICODE_IS_SURROGATE(ch)) {
goto fail;
}
#endif
*out++ = STRINGLIB(SWAB4)(ch);
}
}
*outptr = out;
return len;
#if STRINGLIB_SIZEOF_CHAR > 1
fail:
*outptr = out;
return len - (end - in + 1);
#endif
}
#endif