#include "Python.h"
#include "pycore_abstract.h"
#include "pycore_bytes_methods.h"
#include "pycore_bytesobject.h"
#include "pycore_ceval.h"
#include "pycore_codecs.h"
#include "pycore_critical_section.h"
#include "pycore_format.h"
#include "pycore_initconfig.h"
#include "pycore_interp.h"
#include "pycore_long.h"
#include "pycore_object.h"
#include "pycore_pathconfig.h"
#include "pycore_pyerrors.h"
#include "pycore_pylifecycle.h"
#include "pycore_pystate.h"
#include "pycore_ucnhash.h"
#include "pycore_unicodeobject.h"
#include "pycore_unicodeobject_generated.h"
#include "stringlib/eq.h"
#include <stddef.h>
#ifdef MS_WINDOWS
#include <windows.h>
#endif
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
# include "pycore_fileutils.h"
#endif
#define MAX_UNICODE …
#ifdef Py_DEBUG
#define _PyUnicode_CHECK …
#else
#define _PyUnicode_CHECK(op) …
#endif
#define _PyUnicode_UTF8(op) …
#define PyUnicode_UTF8(op) …
#define _PyUnicode_UTF8_LENGTH(op) …
#define PyUnicode_UTF8_LENGTH(op) …
#define _PyUnicode_LENGTH(op) …
#define _PyUnicode_STATE(op) …
#define _PyUnicode_HASH(op) …
#define _PyUnicode_KIND(op) …
#define _PyUnicode_GET_LENGTH(op) …
#define _PyUnicode_DATA_ANY(op) …
#define _PyUnicode_SHARE_UTF8(op) …
#define _PyUnicode_HAS_UTF8_MEMORY(op) …
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) …
#define LATIN1 …
#ifdef MS_WINDOWS
#define OVERALLOCATE_FACTOR …
#else
#define OVERALLOCATE_FACTOR …
#endif
static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
static inline void
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
static PyObject *
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
const char *errors);
static PyObject *
unicode_decode_utf8(const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed);
static int
unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed);
#ifdef Py_DEBUG
static inline int unicode_is_finalizing(void);
static int unicode_is_singleton(PyObject *unicode);
#endif
static inline PyObject* unicode_get_empty(void)
{ … }
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
{ … }
#define INTERNED_STRINGS …
Py_ssize_t
_PyUnicode_InternedSize(void)
{ … }
Py_ssize_t
_PyUnicode_InternedSize_Immortal(void)
{ … }
static Py_hash_t unicode_hash(PyObject *);
static int unicode_compare_eq(PyObject *, PyObject *);
static Py_uhash_t
hashtable_unicode_hash(const void *key)
{ … }
static int
hashtable_unicode_compare(const void *key1, const void *key2)
{ … }
static int
init_interned_dict(PyInterpreterState *interp)
{ … }
static void
clear_interned_dict(PyInterpreterState *interp)
{ … }
static PyStatus
init_global_interned_strings(PyInterpreterState *interp)
{ … }
static void clear_global_interned_strings(void)
{ … }
#define _Py_RETURN_UNICODE_EMPTY() …
static inline void
unicode_fill(int kind, void *data, Py_UCS4 value,
Py_ssize_t start, Py_ssize_t length)
{ … }
const unsigned char _Py_ascii_whitespace[] = …;
static PyObject* get_latin1_char(unsigned char ch);
static int unicode_modifiable(PyObject *unicode);
static PyObject *
_PyUnicode_FromUCS1(const Py_UCS1 *s, Py_ssize_t size);
static PyObject *
_PyUnicode_FromUCS2(const Py_UCS2 *s, Py_ssize_t size);
static PyObject *
_PyUnicode_FromUCS4(const Py_UCS4 *s, Py_ssize_t size);
static PyObject *
unicode_encode_call_errorhandler(const char *errors,
PyObject **errorHandler,const char *encoding, const char *reason,
PyObject *unicode, PyObject **exceptionObject,
Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
static void
raise_encode_exception(PyObject **exceptionObject,
const char *encoding,
PyObject *unicode,
Py_ssize_t startpos, Py_ssize_t endpos,
const char *reason);
static const unsigned char ascii_linebreak[] = …;
static int convert_uc(PyObject *obj, void *addr);
struct encoding_map;
#include "clinic/unicodeobject.c.h"
_Py_error_handler
_Py_GetErrorHandler(const char *errors)
{ … }
static _Py_error_handler
get_error_handler_wide(const wchar_t *errors)
{ … }
static inline int
unicode_check_encoding_errors(const char *encoding, const char *errors)
{ … }
int
_PyUnicode_CheckConsistency(PyObject *op, int check_content)
{ … }
static PyObject*
unicode_result(PyObject *unicode)
{ … }
static PyObject*
unicode_result_unchanged(PyObject *unicode)
{ … }
static char*
backslashreplace(_PyBytesWriter *writer, char *str,
PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
{ … }
static char*
xmlcharrefreplace(_PyBytesWriter *writer, char *str,
PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
{ … }
#if LONG_BIT >= 128
#define BLOOM_WIDTH …
#elif LONG_BIT >= 64
#define BLOOM_WIDTH …
#elif LONG_BIT >= 32
#define BLOOM_WIDTH …
#else
#error "LONG_BIT is smaller than 32"
#endif
#define BLOOM_MASK …
static BLOOM_MASK bloom_linebreak = …;
#define BLOOM(mask, ch) …
#define BLOOM_LINEBREAK(ch) …
static inline BLOOM_MASK
make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
{ … }
static int
ensure_unicode(PyObject *obj)
{ … }
#define STRINGLIB_GET_EMPTY …
#include "stringlib/asciilib.h"
#include "stringlib/fastsearch.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
#include "stringlib/ucs1lib.h"
#include "stringlib/fastsearch.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/repr.h"
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
#include "stringlib/ucs2lib.h"
#include "stringlib/fastsearch.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/repr.h"
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
#include "stringlib/ucs4lib.h"
#include "stringlib/fastsearch.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/repr.h"
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
#undef STRINGLIB_GET_EMPTY
static inline Py_ssize_t
findchar(const void *s, int kind,
Py_ssize_t size, Py_UCS4 ch,
int direction)
{ … }
#ifdef Py_DEBUG
static void
unicode_fill_invalid(PyObject *unicode, Py_ssize_t old_length)
{
int kind = PyUnicode_KIND(unicode);
Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
Py_ssize_t length = _PyUnicode_LENGTH(unicode);
if (length <= old_length)
return;
memset(data + old_length * kind, 0xff, (length - old_length) * kind);
}
#endif
static PyObject*
resize_compact(PyObject *unicode, Py_ssize_t length)
{ … }
static int
resize_inplace(PyObject *unicode, Py_ssize_t length)
{ … }
static PyObject*
resize_copy(PyObject *unicode, Py_ssize_t length)
{ … }
static const char*
unicode_kind_name(PyObject *unicode)
{ … }
#ifdef Py_DEBUG
const char *_PyUnicode_utf8(void *unicode_raw){
PyObject *unicode = _PyObject_CAST(unicode_raw);
return PyUnicode_UTF8(unicode);
}
const void *_PyUnicode_compact_data(void *unicode_raw) {
PyObject *unicode = _PyObject_CAST(unicode_raw);
return _PyUnicode_COMPACT_DATA(unicode);
}
const void *_PyUnicode_data(void *unicode_raw) {
PyObject *unicode = _PyObject_CAST(unicode_raw);
printf("obj %p\n", (void*)unicode);
printf("compact %d\n", PyUnicode_IS_COMPACT(unicode));
printf("compact ascii %d\n", PyUnicode_IS_COMPACT_ASCII(unicode));
printf("ascii op %p\n", (void*)(_PyASCIIObject_CAST(unicode) + 1));
printf("compact op %p\n", (void*)(_PyCompactUnicodeObject_CAST(unicode) + 1));
printf("compact data %p\n", _PyUnicode_COMPACT_DATA(unicode));
return PyUnicode_DATA(unicode);
}
void
_PyUnicode_Dump(PyObject *op)
{
PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
PyCompactUnicodeObject *compact = _PyCompactUnicodeObject_CAST(op);
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);
const void *data;
if (ascii->state.compact)
{
if (ascii->state.ascii)
data = (ascii + 1);
else
data = (compact + 1);
}
else
data = unicode->data.any;
printf("%s: len=%zu, ", unicode_kind_name(op), ascii->length);
if (!ascii->state.ascii) {
printf("utf8=%p (%zu)", (void *)compact->utf8, compact->utf8_length);
}
printf(", data=%p\n", data);
}
#endif
PyObject *
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
{ … }
static int
unicode_check_modifiable(PyObject *unicode)
{ … }
static int
_copy_characters(PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start,
Py_ssize_t how_many, int check_maxchar)
{ … }
void
_PyUnicode_FastCopyCharacters(
PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many)
{ … }
Py_ssize_t
PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start,
Py_ssize_t how_many)
{ … }
static int
find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
Py_UCS4 *maxchar, Py_ssize_t *num_surrogates)
{ … }
static void
unicode_dealloc(PyObject *unicode)
{ … }
#ifdef Py_DEBUG
static int
unicode_is_singleton(PyObject *unicode)
{
if (unicode == &_Py_STR(empty)) {
return 1;
}
PyASCIIObject *ascii = _PyASCIIObject_CAST(unicode);
if (ascii->length == 1) {
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch < 256 && LATIN1(ch) == unicode) {
return 1;
}
}
return 0;
}
#endif
static int
unicode_modifiable(PyObject *unicode)
{ … }
static int
unicode_resize(PyObject **p_unicode, Py_ssize_t length)
{ … }
int
PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length)
{ … }
static void
unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
const char *str, Py_ssize_t len)
{ … }
static PyObject*
get_latin1_char(Py_UCS1 ch)
{ … }
static PyObject*
unicode_char(Py_UCS4 ch)
{ … }
static inline void
unicode_write_widechar(int kind, void *data,
const wchar_t *u, Py_ssize_t size,
Py_ssize_t num_surrogates)
{ … }
PyObject *
PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
{ … }
int
PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *pub_writer,
const wchar_t *str,
Py_ssize_t size)
{ … }
PyObject *
PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
{ … }
PyObject *
PyUnicode_FromString(const char *u)
{ … }
PyObject *
_PyUnicode_FromId(_Py_Identifier *id)
{ … }
static void
unicode_clear_identifiers(struct _Py_unicode_state *state)
{ … }
PyObject*
_PyUnicode_FromASCII(const char *buffer, Py_ssize_t size)
{ … }
static Py_UCS4
kind_maxchar_limit(int kind)
{ … }
static PyObject*
_PyUnicode_FromUCS1(const Py_UCS1* u, Py_ssize_t size)
{ … }
static PyObject*
_PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
{ … }
static PyObject*
_PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
{ … }
int
PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
Py_UCS4 *str,
Py_ssize_t size)
{ … }
PyObject*
PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
{ … }
Py_UCS4
_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
{ … }
static void
unicode_adjust_maxchar(PyObject **p_unicode)
{ … }
PyObject*
_PyUnicode_Copy(PyObject *unicode)
{ … }
static void*
unicode_askind(int skind, void const *data, Py_ssize_t len, int kind)
{ … }
static Py_UCS4*
as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
int copy_null)
{ … }
Py_UCS4*
PyUnicode_AsUCS4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
int copy_null)
{ … }
Py_UCS4*
PyUnicode_AsUCS4Copy(PyObject *string)
{ … }
#define MAX_INTMAX_CHARS …
static int
unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str,
Py_ssize_t width, Py_ssize_t precision, int flags)
{ … }
static int
unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
Py_ssize_t width, Py_ssize_t precision, int flags)
{ … }
static int
unicode_fromformat_write_wcstr(_PyUnicodeWriter *writer, const wchar_t *str,
Py_ssize_t width, Py_ssize_t precision, int flags)
{ … }
#define F_LONG …
#define F_LONGLONG …
#define F_SIZE …
#define F_PTRDIFF …
#define F_INTMAX …
static const char * const formats[] = …;
static const char * const formats_o[] = …;
static const char * const formats_u[] = …;
static const char * const formats_x[] = …;
static const char * const formats_X[] = …;
static const char*
unicode_fromformat_arg(_PyUnicodeWriter *writer,
const char *f, va_list *vargs)
{ … }
static int
unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
{ … }
PyObject *
PyUnicode_FromFormatV(const char *format, va_list vargs)
{ … }
PyObject *
PyUnicode_FromFormat(const char *format, ...)
{ … }
int
PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
{ … }
static Py_ssize_t
unicode_get_widechar_size(PyObject *unicode)
{ … }
static void
unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size)
{ … }
#ifdef HAVE_WCHAR_H
Py_ssize_t
PyUnicode_AsWideChar(PyObject *unicode,
wchar_t *w,
Py_ssize_t size)
{ … }
wchar_t*
PyUnicode_AsWideCharString(PyObject *unicode,
Py_ssize_t *size)
{ … }
#endif
int
_PyUnicode_WideCharString_Converter(PyObject *obj, void *ptr)
{ … }
int
_PyUnicode_WideCharString_Opt_Converter(PyObject *obj, void *ptr)
{ … }
PyObject *
PyUnicode_FromOrdinal(int ordinal)
{ … }
PyObject *
PyUnicode_FromObject(PyObject *obj)
{ … }
PyObject *
PyUnicode_FromEncodedObject(PyObject *obj,
const char *encoding,
const char *errors)
{ … }
int
_Py_normalize_encoding(const char *encoding,
char *lower,
size_t lower_len)
{ … }
PyObject *
PyUnicode_Decode(const char *s,
Py_ssize_t size,
const char *encoding,
const char *errors)
{ … }
PyObject *
PyUnicode_AsDecodedObject(PyObject *unicode,
const char *encoding,
const char *errors)
{ … }
PyObject *
PyUnicode_AsDecodedUnicode(PyObject *unicode,
const char *encoding,
const char *errors)
{ … }
PyObject *
PyUnicode_AsEncodedObject(PyObject *unicode,
const char *encoding,
const char *errors)
{ … }
static PyObject *
unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
int current_locale)
{ … }
PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{ … }
PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{ … }
PyObject *
PyUnicode_AsEncodedString(PyObject *unicode,
const char *encoding,
const char *errors)
{ … }
PyObject *
PyUnicode_AsEncodedUnicode(PyObject *unicode,
const char *encoding,
const char *errors)
{ … }
static PyObject*
unicode_decode_locale(const char *str, Py_ssize_t len,
_Py_error_handler errors, int current_locale)
{ … }
PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
{ … }
PyObject*
PyUnicode_DecodeLocale(const char *str, const char *errors)
{ … }
PyObject*
PyUnicode_DecodeFSDefault(const char *s) { … }
PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{ … }
int
PyUnicode_FSConverter(PyObject* arg, void* addr)
{ … }
int
PyUnicode_FSDecoder(PyObject* arg, void* addr)
{ … }
static int unicode_fill_utf8(PyObject *unicode);
const char *
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
{ … }
const char *
PyUnicode_AsUTF8(PyObject *unicode)
{ … }
const char *
_PyUnicode_AsUTF8NoNUL(PyObject *unicode)
{ … }
PyAPI_FUNC(Py_ssize_t)
PyUnicode_GetSize(PyObject *unicode)
{ … }
Py_ssize_t
PyUnicode_GetLength(PyObject *unicode)
{ … }
Py_UCS4
PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
{ … }
int
PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch)
{ … }
const char *
PyUnicode_GetDefaultEncoding(void)
{ … }
static void
make_decode_exception(PyObject **exceptionObject,
const char *encoding,
const char *input, Py_ssize_t length,
Py_ssize_t startpos, Py_ssize_t endpos,
const char *reason)
{ … }
#ifdef MS_WINDOWS
static int
widechar_resize(wchar_t **buf, Py_ssize_t *size, Py_ssize_t newsize)
{
if (newsize > *size) {
wchar_t *newbuf = *buf;
if (PyMem_Resize(newbuf, wchar_t, newsize) == NULL) {
PyErr_NoMemory();
return -1;
}
*buf = newbuf;
}
*size = newsize;
return 0;
}
static int
unicode_decode_call_errorhandler_wchar(
const char *errors, PyObject **errorHandler,
const char *encoding, const char *reason,
const char **input, const char **inend, Py_ssize_t *startinpos,
Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
wchar_t **buf, Py_ssize_t *bufsize, Py_ssize_t *outpos)
{
static const char *argparse = "Un;decoding error handler must return (str, int) tuple";
PyObject *restuple = NULL;
PyObject *repunicode = NULL;
Py_ssize_t outsize;
Py_ssize_t insize;
Py_ssize_t requiredsize;
Py_ssize_t newpos;
PyObject *inputobj = NULL;
Py_ssize_t repwlen;
if (*errorHandler == NULL) {
*errorHandler = PyCodec_LookupError(errors);
if (*errorHandler == NULL)
goto onError;
}
make_decode_exception(exceptionObject,
encoding,
*input, *inend - *input,
*startinpos, *endinpos,
reason);
if (*exceptionObject == NULL)
goto onError;
restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
if (restuple == NULL)
goto onError;
if (!PyTuple_Check(restuple)) {
PyErr_SetString(PyExc_TypeError, &argparse[3]);
goto onError;
}
if (!PyArg_ParseTuple(restuple, argparse, &repunicode, &newpos))
goto onError;
inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
if (!inputobj)
goto onError;
*input = PyBytes_AS_STRING(inputobj);
insize = PyBytes_GET_SIZE(inputobj);
*inend = *input + insize;
Py_DECREF(inputobj);
if (newpos<0)
newpos = insize+newpos;
if (newpos<0 || newpos>insize) {
PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
goto onError;
}
repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0);
if (repwlen < 0)
goto onError;
repwlen--;
requiredsize = *outpos;
if (requiredsize > PY_SSIZE_T_MAX - repwlen)
goto overflow;
requiredsize += repwlen;
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
goto overflow;
requiredsize += insize - newpos;
outsize = *bufsize;
if (requiredsize > outsize) {
if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
requiredsize = 2*outsize;
if (widechar_resize(buf, bufsize, requiredsize) < 0) {
goto onError;
}
}
PyUnicode_AsWideChar(repunicode, *buf + *outpos, repwlen);
*outpos += repwlen;
*endinpos = newpos;
*inptr = *input + newpos;
Py_DECREF(restuple);
return 0;
overflow:
PyErr_SetString(PyExc_OverflowError,
"decoded result is too long for a Python string");
onError:
Py_XDECREF(restuple);
return -1;
}
#endif
static int
unicode_decode_call_errorhandler_writer(
const char *errors, PyObject **errorHandler,
const char *encoding, const char *reason,
const char **input, const char **inend, Py_ssize_t *startinpos,
Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
_PyUnicodeWriter *writer )
{ … }
#define IS_BASE64 …
#define FROM_BASE64 …
#define TO_BASE64 …
#define DECODE_DIRECT …
static
char utf7_category[128] = …;
#define ENCODE_DIRECT …
PyObject *
PyUnicode_DecodeUTF7(const char *s,
Py_ssize_t size,
const char *errors)
{ … }
PyObject *
PyUnicode_DecodeUTF7Stateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{ … }
PyObject *
_PyUnicode_EncodeUTF7(PyObject *str,
int base64SetO,
int base64WhiteSpace,
const char *errors)
{ … }
#undef IS_BASE64
#undef FROM_BASE64
#undef TO_BASE64
#undef DECODE_DIRECT
#undef ENCODE_DIRECT
PyObject *
PyUnicode_DecodeUTF8(const char *s,
Py_ssize_t size,
const char *errors)
{ … }
#include "stringlib/asciilib.h"
#include "stringlib/codecs.h"
#include "stringlib/undef.h"
#include "stringlib/ucs1lib.h"
#include "stringlib/codecs.h"
#include "stringlib/undef.h"
#include "stringlib/ucs2lib.h"
#include "stringlib/codecs.h"
#include "stringlib/undef.h"
#include "stringlib/ucs4lib.h"
#include "stringlib/codecs.h"
#include "stringlib/undef.h"
#if (SIZEOF_SIZE_T == 8)
#define ASCII_CHAR_MASK …
#elif (SIZEOF_SIZE_T == 4)
#define ASCII_CHAR_MASK …
#else
# error C 'size_t' size should be either 4 or 8!
#endif
static Py_ssize_t
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
{ … }
static int
unicode_decode_utf8_impl(_PyUnicodeWriter *writer,
const char *starts, const char *s, const char *end,
_Py_error_handler error_handler,
const char *errors,
Py_ssize_t *consumed)
{ … }
static PyObject *
unicode_decode_utf8(const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed)
{ … }
static int
unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed)
{ … }
PyObject *
PyUnicode_DecodeUTF8Stateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{ … }
int
_Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
const char **reason, _Py_error_handler errors)
{ … }
wchar_t*
_Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen,
size_t *wlen)
{ … }
int
_Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
const char **reason, int raw_malloc, _Py_error_handler errors)
{ … }
static PyObject *
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
const char *errors)
{ … }
static int
unicode_fill_utf8(PyObject *unicode)
{ … }
PyObject *
_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
{ … }
PyObject *
PyUnicode_AsUTF8String(PyObject *unicode)
{ … }
PyObject *
PyUnicode_DecodeUTF32(const char *s,
Py_ssize_t size,
const char *errors,
int *byteorder)
{ … }
PyObject *
PyUnicode_DecodeUTF32Stateful(const char *s,
Py_ssize_t size,
const char *errors,
int *byteorder,
Py_ssize_t *consumed)
{ … }
PyObject *
_PyUnicode_EncodeUTF32(PyObject *str,
const char *errors,
int byteorder)
{ … }
PyObject *
PyUnicode_AsUTF32String(PyObject *unicode)
{ … }
PyObject *
PyUnicode_DecodeUTF16(const char *s,
Py_ssize_t size,
const char *errors,
int *byteorder)
{ … }
PyObject *
PyUnicode_DecodeUTF16Stateful(const char *s,
Py_ssize_t size,
const char *errors,
int *byteorder,
Py_ssize_t *consumed)
{ … }
PyObject *
_PyUnicode_EncodeUTF16(PyObject *str,
const char *errors,
int byteorder)
{ … }
PyObject *
PyUnicode_AsUTF16String(PyObject *unicode)
{ … }
_PyUnicode_Name_CAPI *
_PyUnicode_GetNameCAPI(void)
{ … }
PyObject *
_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed,
const char **first_invalid_escape)
{ … }
PyObject *
_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{ … }
PyObject *
PyUnicode_DecodeUnicodeEscape(const char *s,
Py_ssize_t size,
const char *errors)
{ … }
PyObject *
PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
{ … }
PyObject *
_PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{ … }
PyObject *
PyUnicode_DecodeRawUnicodeEscape(const char *s,
Py_ssize_t size,
const char *errors)
{ … }
PyObject *
PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
{ … }
PyObject *
PyUnicode_DecodeLatin1(const char *s,
Py_ssize_t size,
const char *errors)
{ … }
static void
make_encode_exception(PyObject **exceptionObject,
const char *encoding,
PyObject *unicode,
Py_ssize_t startpos, Py_ssize_t endpos,
const char *reason)
{ … }
static void
raise_encode_exception(PyObject **exceptionObject,
const char *encoding,
PyObject *unicode,
Py_ssize_t startpos, Py_ssize_t endpos,
const char *reason)
{ … }
static PyObject *
unicode_encode_call_errorhandler(const char *errors,
PyObject **errorHandler,
const char *encoding, const char *reason,
PyObject *unicode, PyObject **exceptionObject,
Py_ssize_t startpos, Py_ssize_t endpos,
Py_ssize_t *newpos)
{ … }
static PyObject *
unicode_encode_ucs1(PyObject *unicode,
const char *errors,
const Py_UCS4 limit)
{ … }
PyObject *
_PyUnicode_AsLatin1String(PyObject *unicode, const char *errors)
{ … }
PyObject*
PyUnicode_AsLatin1String(PyObject *unicode)
{ … }
PyObject *
PyUnicode_DecodeASCII(const char *s,
Py_ssize_t size,
const char *errors)
{ … }
PyObject *
_PyUnicode_AsASCIIString(PyObject *unicode, const char *errors)
{ … }
PyObject *
PyUnicode_AsASCIIString(PyObject *unicode)
{ … }
#ifdef MS_WINDOWS
#if SIZEOF_INT < SIZEOF_SIZE_T
#define NEED_RETRY
#endif
#define DECODING_CHUNK_SIZE …
#ifndef WC_ERR_INVALID_CHARS
#define WC_ERR_INVALID_CHARS …
#endif
static const char*
code_page_name(UINT code_page, PyObject **obj)
{
*obj = NULL;
if (code_page == CP_ACP)
return "mbcs";
if (code_page == CP_UTF7)
return "CP_UTF7";
if (code_page == CP_UTF8)
return "CP_UTF8";
*obj = PyBytes_FromFormat("cp%u", code_page);
if (*obj == NULL)
return NULL;
return PyBytes_AS_STRING(*obj);
}
static DWORD
decode_code_page_flags(UINT code_page)
{
if (code_page == CP_UTF7) {
return 0;
}
else
return MB_ERR_INVALID_CHARS;
}
static int
decode_code_page_strict(UINT code_page,
wchar_t **buf,
Py_ssize_t *bufsize,
const char *in,
int insize)
{
DWORD flags = MB_ERR_INVALID_CHARS;
wchar_t *out;
DWORD outsize;
assert(insize > 0);
while ((outsize = MultiByteToWideChar(code_page, flags,
in, insize, NULL, 0)) <= 0)
{
if (!flags || GetLastError() != ERROR_INVALID_FLAGS) {
goto error;
}
flags = 0;
}
Py_ssize_t n = *bufsize;
if (widechar_resize(buf, bufsize, n + outsize) < 0) {
return -1;
}
out = *buf + n;
outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize);
if (outsize <= 0)
goto error;
return insize;
error:
if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
return -2;
PyErr_SetFromWindowsErr(0);
return -1;
}
static int
decode_code_page_errors(UINT code_page,
wchar_t **buf,
Py_ssize_t *bufsize,
const char *in, const int size,
const char *errors, int final)
{
const char *startin = in;
const char *endin = in + size;
DWORD flags = MB_ERR_INVALID_CHARS;
const char *reason = "No mapping for the Unicode character exists "
"in the target code page.";
wchar_t buffer[2], *out;
int insize;
Py_ssize_t outsize;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *encoding_obj = NULL;
const char *encoding;
DWORD err;
int ret = -1;
assert(size > 0);
encoding = code_page_name(code_page, &encoding_obj);
if (encoding == NULL)
return -1;
if ((errors == NULL || strcmp(errors, "strict") == 0) && final) {
make_decode_exception(&exc, encoding, in, size, 0, 0, reason);
if (exc != NULL) {
PyCodec_StrictErrors(exc);
Py_CLEAR(exc);
}
goto error;
}
Py_ssize_t n = *bufsize;
if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
PyErr_NoMemory();
goto error;
}
if (widechar_resize(buf, bufsize, n + size * Py_ARRAY_LENGTH(buffer)) < 0) {
goto error;
}
out = *buf + n;
while (in < endin)
{
insize = 1;
do
{
outsize = MultiByteToWideChar(code_page, flags,
in, insize,
buffer, Py_ARRAY_LENGTH(buffer));
if (outsize > 0)
break;
err = GetLastError();
if (err == ERROR_INVALID_FLAGS && flags) {
flags = 0;
continue;
}
if (err != ERROR_NO_UNICODE_TRANSLATION
&& err != ERROR_INSUFFICIENT_BUFFER)
{
PyErr_SetFromWindowsErr(err);
goto error;
}
insize++;
}
while (insize <= 4 && (in + insize) <= endin);
if (outsize <= 0) {
Py_ssize_t startinpos, endinpos, outpos;
if (in + insize >= endin && !final)
break;
startinpos = in - startin;
endinpos = startinpos + 1;
outpos = out - *buf;
if (unicode_decode_call_errorhandler_wchar(
errors, &errorHandler,
encoding, reason,
&startin, &endin, &startinpos, &endinpos, &exc, &in,
buf, bufsize, &outpos))
{
goto error;
}
out = *buf + outpos;
}
else {
in += insize;
memcpy(out, buffer, outsize * sizeof(wchar_t));
out += outsize;
}
}
assert(out - *buf <= *bufsize);
*bufsize = out - *buf;
ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int);
error:
Py_XDECREF(encoding_obj);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return ret;
}
static PyObject *
decode_code_page_stateful(int code_page,
const char *s, Py_ssize_t size,
const char *errors, Py_ssize_t *consumed)
{
wchar_t *buf = NULL;
Py_ssize_t bufsize = 0;
int chunk_size, final, converted, done;
if (code_page < 0) {
PyErr_SetString(PyExc_ValueError, "invalid code page number");
return NULL;
}
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
if (consumed)
*consumed = 0;
do
{
#ifdef NEED_RETRY
if (size > DECODING_CHUNK_SIZE) {
chunk_size = DECODING_CHUNK_SIZE;
final = 0;
done = 0;
}
else
#endif
{
chunk_size = (int)size;
final = (consumed == NULL);
done = 1;
}
if (chunk_size == 0 && done) {
if (buf != NULL)
break;
_Py_RETURN_UNICODE_EMPTY();
}
converted = decode_code_page_strict(code_page, &buf, &bufsize,
s, chunk_size);
if (converted == -2)
converted = decode_code_page_errors(code_page, &buf, &bufsize,
s, chunk_size,
errors, final);
assert(converted != 0 || done);
if (converted < 0) {
PyMem_Free(buf);
return NULL;
}
if (consumed)
*consumed += converted;
s += converted;
size -= converted;
} while (!done);
PyObject *v = PyUnicode_FromWideChar(buf, bufsize);
PyMem_Free(buf);
return v;
}
PyObject *
PyUnicode_DecodeCodePageStateful(int code_page,
const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{
return decode_code_page_stateful(code_page, s, size, errors, consumed);
}
PyObject *
PyUnicode_DecodeMBCSStateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{
return decode_code_page_stateful(CP_ACP, s, size, errors, consumed);
}
PyObject *
PyUnicode_DecodeMBCS(const char *s,
Py_ssize_t size,
const char *errors)
{
return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL);
}
static DWORD
encode_code_page_flags(UINT code_page, const char *errors)
{
if (code_page == CP_UTF8) {
return WC_ERR_INVALID_CHARS;
}
else if (code_page == CP_UTF7) {
return 0;
}
else {
if (errors != NULL && strcmp(errors, "replace") == 0)
return 0;
else
return WC_NO_BEST_FIT_CHARS;
}
}
static int
encode_code_page_strict(UINT code_page, PyObject **outbytes,
PyObject *unicode, Py_ssize_t offset, int len,
const char* errors)
{
BOOL usedDefaultChar = FALSE;
BOOL *pusedDefaultChar = &usedDefaultChar;
int outsize;
wchar_t *p;
Py_ssize_t size;
const DWORD flags = encode_code_page_flags(code_page, NULL);
char *out;
PyObject *substring;
int ret = -1;
assert(len > 0);
if (code_page != CP_UTF8 && code_page != CP_UTF7)
pusedDefaultChar = &usedDefaultChar;
else
pusedDefaultChar = NULL;
substring = PyUnicode_Substring(unicode, offset, offset+len);
if (substring == NULL)
return -1;
p = PyUnicode_AsWideCharString(substring, &size);
Py_CLEAR(substring);
if (p == NULL) {
return -1;
}
assert(size <= INT_MAX);
outsize = WideCharToMultiByte(code_page, flags,
p, (int)size,
NULL, 0,
NULL, pusedDefaultChar);
if (outsize <= 0)
goto error;
if (pusedDefaultChar && *pusedDefaultChar) {
ret = -2;
goto done;
}
if (*outbytes == NULL) {
*outbytes = PyBytes_FromStringAndSize(NULL, outsize);
if (*outbytes == NULL) {
goto done;
}
out = PyBytes_AS_STRING(*outbytes);
}
else {
const Py_ssize_t n = PyBytes_Size(*outbytes);
if (outsize > PY_SSIZE_T_MAX - n) {
PyErr_NoMemory();
goto done;
}
if (_PyBytes_Resize(outbytes, n + outsize) < 0) {
goto done;
}
out = PyBytes_AS_STRING(*outbytes) + n;
}
outsize = WideCharToMultiByte(code_page, flags,
p, (int)size,
out, outsize,
NULL, pusedDefaultChar);
if (outsize <= 0)
goto error;
if (pusedDefaultChar && *pusedDefaultChar) {
ret = -2;
goto done;
}
ret = 0;
done:
PyMem_Free(p);
return ret;
error:
if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
ret = -2;
goto done;
}
PyErr_SetFromWindowsErr(0);
goto done;
}
static int
encode_code_page_errors(UINT code_page, PyObject **outbytes,
PyObject *unicode, Py_ssize_t unicode_offset,
Py_ssize_t insize, const char* errors)
{
const DWORD flags = encode_code_page_flags(code_page, errors);
Py_ssize_t pos = unicode_offset;
Py_ssize_t endin = unicode_offset + insize;
const char *reason = "invalid character";
char buffer[4];
BOOL usedDefaultChar = FALSE, *pusedDefaultChar;
Py_ssize_t outsize;
char *out;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *encoding_obj = NULL;
const char *encoding;
Py_ssize_t newpos, newoutsize;
PyObject *rep;
int ret = -1;
assert(insize > 0);
encoding = code_page_name(code_page, &encoding_obj);
if (encoding == NULL)
return -1;
if (errors == NULL || strcmp(errors, "strict") == 0) {
make_encode_exception(&exc, encoding, unicode, 0, 0, reason);
if (exc != NULL) {
PyCodec_StrictErrors(exc);
Py_DECREF(exc);
}
Py_XDECREF(encoding_obj);
return -1;
}
if (code_page != CP_UTF8 && code_page != CP_UTF7)
pusedDefaultChar = &usedDefaultChar;
else
pusedDefaultChar = NULL;
if (Py_ARRAY_LENGTH(buffer) > PY_SSIZE_T_MAX / insize) {
PyErr_NoMemory();
goto error;
}
outsize = insize * Py_ARRAY_LENGTH(buffer);
if (*outbytes == NULL) {
*outbytes = PyBytes_FromStringAndSize(NULL, outsize);
if (*outbytes == NULL)
goto error;
out = PyBytes_AS_STRING(*outbytes);
}
else {
Py_ssize_t n = PyBytes_Size(*outbytes);
if (n > PY_SSIZE_T_MAX - outsize) {
PyErr_NoMemory();
goto error;
}
if (_PyBytes_Resize(outbytes, n + outsize) < 0)
goto error;
out = PyBytes_AS_STRING(*outbytes) + n;
}
while (pos < endin)
{
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, pos);
wchar_t chars[2];
int charsize;
if (ch < 0x10000) {
chars[0] = (wchar_t)ch;
charsize = 1;
}
else {
chars[0] = Py_UNICODE_HIGH_SURROGATE(ch);
chars[1] = Py_UNICODE_LOW_SURROGATE(ch);
charsize = 2;
}
outsize = WideCharToMultiByte(code_page, flags,
chars, charsize,
buffer, Py_ARRAY_LENGTH(buffer),
NULL, pusedDefaultChar);
if (outsize > 0) {
if (pusedDefaultChar == NULL || !(*pusedDefaultChar))
{
pos++;
memcpy(out, buffer, outsize);
out += outsize;
continue;
}
}
else if (GetLastError() != ERROR_NO_UNICODE_TRANSLATION) {
PyErr_SetFromWindowsErr(0);
goto error;
}
rep = unicode_encode_call_errorhandler(
errors, &errorHandler, encoding, reason,
unicode, &exc,
pos, pos + 1, &newpos);
if (rep == NULL)
goto error;
Py_ssize_t morebytes = pos - newpos;
if (PyBytes_Check(rep)) {
outsize = PyBytes_GET_SIZE(rep);
morebytes += outsize;
if (morebytes > 0) {
Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
newoutsize = PyBytes_GET_SIZE(*outbytes) + morebytes;
if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
Py_DECREF(rep);
goto error;
}
out = PyBytes_AS_STRING(*outbytes) + offset;
}
memcpy(out, PyBytes_AS_STRING(rep), outsize);
out += outsize;
}
else {
Py_ssize_t i;
int kind;
const void *data;
outsize = PyUnicode_GET_LENGTH(rep);
morebytes += outsize;
if (morebytes > 0) {
Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
newoutsize = PyBytes_GET_SIZE(*outbytes) + morebytes;
if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
Py_DECREF(rep);
goto error;
}
out = PyBytes_AS_STRING(*outbytes) + offset;
}
kind = PyUnicode_KIND(rep);
data = PyUnicode_DATA(rep);
for (i=0; i < outsize; i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
if (ch > 127) {
raise_encode_exception(&exc,
encoding, unicode,
pos, pos + 1,
"unable to encode error handler result to ASCII");
Py_DECREF(rep);
goto error;
}
*out = (unsigned char)ch;
out++;
}
}
pos = newpos;
Py_DECREF(rep);
}
*out = 0;
outsize = out - PyBytes_AS_STRING(*outbytes);
assert(outsize <= PyBytes_GET_SIZE(*outbytes));
if (_PyBytes_Resize(outbytes, outsize) < 0)
goto error;
ret = 0;
error:
Py_XDECREF(encoding_obj);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return ret;
}
static PyObject *
encode_code_page(int code_page,
PyObject *unicode,
const char *errors)
{
Py_ssize_t len;
PyObject *outbytes = NULL;
Py_ssize_t offset;
int chunk_len, ret, done;
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
return NULL;
}
len = PyUnicode_GET_LENGTH(unicode);
if (code_page < 0) {
PyErr_SetString(PyExc_ValueError, "invalid code page number");
return NULL;
}
if (len == 0)
return PyBytes_FromStringAndSize(NULL, 0);
offset = 0;
do
{
#ifdef NEED_RETRY
if (len > DECODING_CHUNK_SIZE) {
chunk_len = DECODING_CHUNK_SIZE;
done = 0;
}
else
#endif
{
chunk_len = (int)len;
done = 1;
}
ret = encode_code_page_strict(code_page, &outbytes,
unicode, offset, chunk_len,
errors);
if (ret == -2)
ret = encode_code_page_errors(code_page, &outbytes,
unicode, offset,
chunk_len, errors);
if (ret < 0) {
Py_XDECREF(outbytes);
return NULL;
}
offset += chunk_len;
len -= chunk_len;
} while (!done);
return outbytes;
}
PyObject *
PyUnicode_EncodeCodePage(int code_page,
PyObject *unicode,
const char *errors)
{
return encode_code_page(code_page, unicode, errors);
}
PyObject *
PyUnicode_AsMBCSString(PyObject *unicode)
{
return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL);
}
#undef NEED_RETRY
#endif
static int
charmap_decode_string(const char *s,
Py_ssize_t size,
PyObject *mapping,
const char *errors,
_PyUnicodeWriter *writer)
{ … }
static int
charmap_decode_mapping(const char *s,
Py_ssize_t size,
PyObject *mapping,
const char *errors,
_PyUnicodeWriter *writer)
{ … }
PyObject *
PyUnicode_DecodeCharmap(const char *s,
Py_ssize_t size,
PyObject *mapping,
const char *errors)
{ … }
struct encoding_map { … };
static PyObject *
EncodingMap_size_impl(struct encoding_map *self)
{ … }
static PyMethodDef encoding_map_methods[] = …;
static PyTypeObject EncodingMapType = …;
PyObject*
PyUnicode_BuildEncodingMap(PyObject* string)
{ … }
static int
encoding_map_lookup(Py_UCS4 c, PyObject *mapping)
{ … }
static PyObject *
charmapencode_lookup(Py_UCS4 c, PyObject *mapping, unsigned char *replace)
{ … }
static int
charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
{ … }
charmapencode_result;
static charmapencode_result
charmapencode_output(Py_UCS4 c, PyObject *mapping,
PyObject **outobj, Py_ssize_t *outpos)
{ … }
static int
charmap_encoding_error(
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
PyObject **exceptionObject,
_Py_error_handler *error_handler, PyObject **error_handler_obj, const char *errors,
PyObject **res, Py_ssize_t *respos)
{ … }
PyObject *
_PyUnicode_EncodeCharmap(PyObject *unicode,
PyObject *mapping,
const char *errors)
{ … }
PyObject *
PyUnicode_AsCharmapString(PyObject *unicode,
PyObject *mapping)
{ … }
static void
make_translate_exception(PyObject **exceptionObject,
PyObject *unicode,
Py_ssize_t startpos, Py_ssize_t endpos,
const char *reason)
{ … }
static PyObject *
unicode_translate_call_errorhandler(const char *errors,
PyObject **errorHandler,
const char *reason,
PyObject *unicode, PyObject **exceptionObject,
Py_ssize_t startpos, Py_ssize_t endpos,
Py_ssize_t *newpos)
{ … }
static int
charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result, Py_UCS4 *replace)
{ … }
static int
charmaptranslate_output(Py_UCS4 ch, PyObject *mapping,
_PyUnicodeWriter *writer)
{ … }
static int
unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
Py_UCS1 *translate)
{ … }
static int
unicode_fast_translate(PyObject *input, PyObject *mapping,
_PyUnicodeWriter *writer, int ignore,
Py_ssize_t *input_pos)
{ … }
static PyObject *
_PyUnicode_TranslateCharmap(PyObject *input,
PyObject *mapping,
const char *errors)
{ … }
PyObject *
PyUnicode_Translate(PyObject *str,
PyObject *mapping,
const char *errors)
{ … }
PyObject *
_PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode)
{ … }
#define ADJUST_INDICES(start, end, len) …
static Py_ssize_t
any_find_slice(PyObject* s1, PyObject* s2,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{ … }
#include "stringlib/localeutil.h"
Py_ssize_t
_PyUnicode_InsertThousandsGrouping(
_PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar)
{ … }
Py_ssize_t
PyUnicode_Count(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end)
{ … }
Py_ssize_t
PyUnicode_Find(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{ … }
Py_ssize_t
PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
Py_ssize_t start, Py_ssize_t end,
int direction)
{ … }
static int
tailmatch(PyObject *self,
PyObject *substring,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{ … }
Py_ssize_t
PyUnicode_Tailmatch(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{ … }
static PyObject *
ascii_upper_or_lower(PyObject *self, int lower)
{ … }
static Py_UCS4
handle_capital_sigma(int kind, const void *data, Py_ssize_t length, Py_ssize_t i)
{ … }
static int
lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i,
Py_UCS4 c, Py_UCS4 *mapped)
{ … }
static Py_ssize_t
do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{ … }
static Py_ssize_t
do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { … }
static Py_ssize_t
do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res,
Py_UCS4 *maxchar, int lower)
{ … }
static Py_ssize_t
do_upper(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{ … }
static Py_ssize_t
do_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{ … }
static Py_ssize_t
do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{ … }
static Py_ssize_t
do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{ … }
static PyObject *
case_operation(PyObject *self,
Py_ssize_t (*perform)(int, const void *, Py_ssize_t, Py_UCS4 *, Py_UCS4 *))
{ … }
PyObject *
PyUnicode_Join(PyObject *separator, PyObject *seq)
{ … }
PyObject *
_PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seqlen)
{ … }
void
_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
Py_UCS4 fill_char)
{ … }
Py_ssize_t
PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
Py_UCS4 fill_char)
{ … }
static PyObject *
pad(PyObject *self,
Py_ssize_t left,
Py_ssize_t right,
Py_UCS4 fill)
{ … }
PyObject *
PyUnicode_Splitlines(PyObject *string, int keepends)
{ … }
static PyObject *
split(PyObject *self,
PyObject *substring,
Py_ssize_t maxcount)
{ … }
static PyObject *
rsplit(PyObject *self,
PyObject *substring,
Py_ssize_t maxcount)
{ … }
static Py_ssize_t
anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1,
PyObject *str2, const void *buf2, Py_ssize_t len2, Py_ssize_t offset)
{ … }
static Py_ssize_t
anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen,
PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount)
{ … }
static void
replace_1char_inplace(PyObject *u, Py_ssize_t pos,
Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
{ … }
static PyObject *
replace(PyObject *self, PyObject *str1,
PyObject *str2, Py_ssize_t maxcount)
{ … }
static PyObject *
unicode_title_impl(PyObject *self)
{ … }
static PyObject *
unicode_capitalize_impl(PyObject *self)
{ … }
static PyObject *
unicode_casefold_impl(PyObject *self)
{ … }
static int
convert_uc(PyObject *obj, void *addr)
{ … }
static PyObject *
unicode_center_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar)
{ … }
static int
unicode_compare(PyObject *str1, PyObject *str2)
{ … }
static int
unicode_compare_eq(PyObject *str1, PyObject *str2)
{ … }
int
_PyUnicode_Equal(PyObject *str1, PyObject *str2)
{ … }
int
PyUnicode_Compare(PyObject *left, PyObject *right)
{ … }
int
PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
{ … }
int
PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
{ … }
int
PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t size)
{ … }
int
_PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str)
{ … }
int
_PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
{ … }
PyObject *
PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
{ … }
int
_PyUnicode_EQ(PyObject *aa, PyObject *bb)
{ … }
int
PyUnicode_Contains(PyObject *str, PyObject *substr)
{ … }
PyObject *
PyUnicode_Concat(PyObject *left, PyObject *right)
{ … }
void
PyUnicode_Append(PyObject **p_left, PyObject *right)
{ … }
void
PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right)
{ … }
static Py_ssize_t
unicode_count_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
Py_ssize_t end)
{ … }
static PyObject *
unicode_encode_impl(PyObject *self, const char *encoding, const char *errors)
{ … }
static PyObject *
unicode_expandtabs_impl(PyObject *self, int tabsize)
{ … }
static Py_ssize_t
unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
Py_ssize_t end)
{ … }
static PyObject *
unicode_getitem(PyObject *self, Py_ssize_t index)
{ … }
static Py_hash_t
unicode_hash(PyObject *self)
{ … }
static Py_ssize_t
unicode_index_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
Py_ssize_t end)
{ … }
static PyObject *
unicode_isascii_impl(PyObject *self)
{ … }
static PyObject *
unicode_islower_impl(PyObject *self)
{ … }
static PyObject *
unicode_isupper_impl(PyObject *self)
{ … }
static PyObject *
unicode_istitle_impl(PyObject *self)
{ … }
static PyObject *
unicode_isspace_impl(PyObject *self)
{ … }
static PyObject *
unicode_isalpha_impl(PyObject *self)
{ … }
static PyObject *
unicode_isalnum_impl(PyObject *self)
{ … }
static PyObject *
unicode_isdecimal_impl(PyObject *self)
{ … }
static PyObject *
unicode_isdigit_impl(PyObject *self)
{ … }
static PyObject *
unicode_isnumeric_impl(PyObject *self)
{ … }
Py_ssize_t
_PyUnicode_ScanIdentifier(PyObject *self)
{ … }
int
PyUnicode_IsIdentifier(PyObject *self)
{ … }
static PyObject *
unicode_isidentifier_impl(PyObject *self)
{ … }
static PyObject *
unicode_isprintable_impl(PyObject *self)
{ … }
static PyObject *
unicode_join(PyObject *self, PyObject *iterable)
{ … }
static Py_ssize_t
unicode_length(PyObject *self)
{ … }
static PyObject *
unicode_ljust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar)
{ … }
static PyObject *
unicode_lower_impl(PyObject *self)
{ … }
#define LEFTSTRIP …
#define RIGHTSTRIP …
#define BOTHSTRIP …
static const char *stripfuncnames[] = …;
#define STRIPNAME(i) …
PyObject *
_PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
{ … }
PyObject*
PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
{ … }
static PyObject *
do_strip(PyObject *self, int striptype)
{ … }
static PyObject *
do_argstrip(PyObject *self, int striptype, PyObject *sep)
{ … }
static PyObject *
unicode_strip_impl(PyObject *self, PyObject *chars)
{ … }
static PyObject *
unicode_lstrip_impl(PyObject *self, PyObject *chars)
{ … }
static PyObject *
unicode_rstrip_impl(PyObject *self, PyObject *chars)
{ … }
static PyObject*
unicode_repeat(PyObject *str, Py_ssize_t len)
{ … }
PyObject *
PyUnicode_Replace(PyObject *str,
PyObject *substr,
PyObject *replstr,
Py_ssize_t maxcount)
{ … }
static PyObject *
unicode_replace_impl(PyObject *self, PyObject *old, PyObject *new,
Py_ssize_t count)
{ … }
static PyObject *
unicode_removeprefix_impl(PyObject *self, PyObject *prefix)
{ … }
static PyObject *
unicode_removesuffix_impl(PyObject *self, PyObject *suffix)
{ … }
static PyObject *
unicode_repr(PyObject *unicode)
{ … }
static Py_ssize_t
unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
Py_ssize_t end)
{ … }
static Py_ssize_t
unicode_rindex_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
Py_ssize_t end)
{ … }
static PyObject *
unicode_rjust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar)
{ … }
PyObject *
PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
{ … }
static PyObject *
unicode_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit)
{ … }
PyObject *
PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
{ … }
PyObject *
PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
{ … }
static PyObject *
unicode_partition(PyObject *self, PyObject *sep)
{ … }
static PyObject *
unicode_rpartition(PyObject *self, PyObject *sep)
{ … }
PyObject *
PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
{ … }
static PyObject *
unicode_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit)
{ … }
static PyObject *
unicode_splitlines_impl(PyObject *self, int keepends)
{ … }
static
PyObject *unicode_str(PyObject *self)
{ … }
static PyObject *
unicode_swapcase_impl(PyObject *self)
{ … }
static PyObject *
unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z)
{ … }
static PyObject *
unicode_translate(PyObject *self, PyObject *table)
{ … }
static PyObject *
unicode_upper_impl(PyObject *self)
{ … }
static PyObject *
unicode_zfill_impl(PyObject *self, Py_ssize_t width)
{ … }
static PyObject *
unicode_startswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start,
Py_ssize_t end)
{ … }
static PyObject *
unicode_endswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start,
Py_ssize_t end)
{ … }
static inline void
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
{ … }
void
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
{ … }
PyUnicodeWriter*
PyUnicodeWriter_Create(Py_ssize_t length)
{ … }
void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
{ … }
static inline void
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
{ … }
int
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar)
{ … }
int
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
int kind)
{ … }
static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
{ … }
int
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
{ … }
int
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
{ … }
int
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
{ … }
int
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
{ … }
int
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
{ … }
int
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
Py_ssize_t start, Py_ssize_t end)
{ … }
int
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
Py_ssize_t start, Py_ssize_t end)
{ … }
int
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
const char *ascii, Py_ssize_t len)
{ … }
int
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
const char *str,
Py_ssize_t size)
{ … }
int
PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
const char *string,
Py_ssize_t length,
const char *errors,
Py_ssize_t *consumed)
{ … }
int
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
const char *str, Py_ssize_t len)
{ … }
PyObject *
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
{ … }
PyObject*
PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
{ … }
void
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
{ … }
#include "stringlib/unicode_format.h"
PyDoc_STRVAR(format__doc__,
"format($self, /, *args, **kwargs)\n\
--\n\
\n\
Return a formatted version of the string, using substitutions from args and kwargs.\n\
The substitutions are identified by braces ('{' and '}').");
PyDoc_STRVAR(format_map__doc__,
"format_map($self, mapping, /)\n\
--\n\
\n\
Return a formatted version of the string, using substitutions from mapping.\n\
The substitutions are identified by braces ('{' and '}').");
static PyObject *
unicode___format___impl(PyObject *self, PyObject *format_spec)
{ … }
static PyObject *
unicode_sizeof_impl(PyObject *self)
{ … }
static PyObject *
unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
{ … }
static PyMethodDef unicode_methods[] = …;
static PyObject *
unicode_mod(PyObject *v, PyObject *w)
{ … }
static PyNumberMethods unicode_as_number = …;
static PySequenceMethods unicode_as_sequence = …;
static PyObject*
unicode_subscript(PyObject* self, PyObject* item)
{ … }
static PyMappingMethods unicode_as_mapping = …;
struct unicode_formatter_t { … };
struct unicode_format_arg_t { … };
static PyObject *
unicode_format_getnextarg(struct unicode_formatter_t *ctx)
{ … }
static int
formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
PyObject **p_output,
_PyUnicodeWriter *writer)
{ … }
PyObject *
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
{ … }
static int
mainformatlong(PyObject *v,
struct unicode_format_arg_t *arg,
PyObject **p_output,
_PyUnicodeWriter *writer)
{ … }
static Py_UCS4
formatchar(PyObject *v)
{ … }
static int
unicode_format_arg_parse(struct unicode_formatter_t *ctx,
struct unicode_format_arg_t *arg)
{ … }
static int
unicode_format_arg_format(struct unicode_formatter_t *ctx,
struct unicode_format_arg_t *arg,
PyObject **p_str)
{ … }
static int
unicode_format_arg_output(struct unicode_formatter_t *ctx,
struct unicode_format_arg_t *arg,
PyObject *str)
{ … }
static int
unicode_format_arg(struct unicode_formatter_t *ctx)
{ … }
PyObject *
PyUnicode_Format(PyObject *format, PyObject *args)
{ … }
static PyObject *
unicode_subtype_new(PyTypeObject *type, PyObject *unicode);
static PyObject *
unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
const char *errors)
{ … }
static const char *
arg_as_utf8(PyObject *obj, const char *name)
{ … }
static PyObject *
unicode_vectorcall(PyObject *type, PyObject *const *args,
size_t nargsf, PyObject *kwnames)
{ … }
static PyObject *
unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
{ … }
void
_PyUnicode_ExactDealloc(PyObject *op)
{ … }
PyDoc_STRVAR(unicode_doc,
"str(object='') -> str\n\
str(bytes_or_buffer[, encoding[, errors]]) -> str\n\
\n\
Create a new string object from the given object. If encoding or\n\
errors is specified, then the object must expose a data buffer\n\
that will be decoded using the given encoding and error handler.\n\
Otherwise, returns the result of object.__str__() (if defined)\n\
or repr(object).\n\
encoding defaults to 'utf-8'.\n\
errors defaults to 'strict'.");
static PyObject *unicode_iter(PyObject *seq);
PyTypeObject PyUnicode_Type = …;
static void
_init_global_state(void)
{ … }
void
_PyUnicode_InitState(PyInterpreterState *interp)
{ … }
PyStatus
_PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
{ … }
PyStatus
_PyUnicode_InitTypes(PyInterpreterState *interp)
{ … }
static PyObject*
intern_static(PyInterpreterState *interp, PyObject *s )
{ … }
void
_PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p)
{ … }
static void
immortalize_interned(PyObject *s)
{ … }
static PyObject*
intern_common(PyInterpreterState *interp, PyObject *s ,
bool immortalize)
{ … }
void
_PyUnicode_InternImmortal(PyInterpreterState *interp, PyObject **p)
{ … }
void
_PyUnicode_InternMortal(PyInterpreterState *interp, PyObject **p)
{ … }
void
_PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
{ … }
void
PyUnicode_InternInPlace(PyObject **p)
{ … }
PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
void
PyUnicode_InternImmortal(PyObject **p)
{ … }
PyObject *
PyUnicode_InternFromString(const char *cp)
{ … }
void
_PyUnicode_ClearInterned(PyInterpreterState *interp)
{ … }
unicodeiterobject;
static void
unicodeiter_dealloc(unicodeiterobject *it)
{ … }
static int
unicodeiter_traverse(unicodeiterobject *it, visitproc visit, void *arg)
{ … }
static PyObject *
unicodeiter_next(unicodeiterobject *it)
{ … }
static PyObject *
unicode_ascii_iter_next(unicodeiterobject *it)
{ … }
static PyObject *
unicodeiter_len(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
{ … }
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
static PyObject *
unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
{ … }
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
static PyObject *
unicodeiter_setstate(unicodeiterobject *it, PyObject *state)
{ … }
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
static PyMethodDef unicodeiter_methods[] = …;
PyTypeObject PyUnicodeIter_Type = …;
PyTypeObject _PyUnicodeASCIIIter_Type = …;
static PyObject *
unicode_iter(PyObject *seq)
{ … }
static int
encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
{ … }
static int
config_get_codec_name(wchar_t **config_encoding)
{ … }
static PyStatus
init_stdio_encoding(PyInterpreterState *interp)
{ … }
static int
init_fs_codec(PyInterpreterState *interp)
{ … }
static PyStatus
init_fs_encoding(PyThreadState *tstate)
{ … }
PyStatus
_PyUnicode_InitEncodings(PyThreadState *tstate)
{ … }
static void
_PyUnicode_FiniEncodings(struct _Py_unicode_fs_codec *fs_codec)
{ … }
#ifdef MS_WINDOWS
int
_PyUnicode_EnableLegacyWindowsFSEncoding(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
PyConfig *config = (PyConfig *)_PyInterpreterState_GetConfig(interp);
wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs");
wchar_t *errors = _PyMem_RawWcsdup(L"replace");
if (encoding == NULL || errors == NULL) {
PyMem_RawFree(encoding);
PyMem_RawFree(errors);
PyErr_NoMemory();
return -1;
}
PyMem_RawFree(config->filesystem_encoding);
config->filesystem_encoding = encoding;
PyMem_RawFree(config->filesystem_errors);
config->filesystem_errors = errors;
return init_fs_codec(interp);
}
#endif
#ifdef Py_DEBUG
static inline int
unicode_is_finalizing(void)
{
return (get_interned_dict(_PyInterpreterState_Main()) == NULL);
}
#endif
void
_PyUnicode_FiniTypes(PyInterpreterState *interp)
{ … }
void
_PyUnicode_Fini(PyInterpreterState *interp)
{ … }
static PyMethodDef _string_methods[] = …;
static PyModuleDef_Slot module_slots[] = …;
static struct PyModuleDef _string_module = …;
PyMODINIT_FUNC
PyInit__string(void)
{ … }