/* An implementation of Text I/O as defined by PEP 3116 - "New I/O" Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper. Written by Amaury Forgeot d'Arc and Antoine Pitrou */ #include "Python.h" #include "pycore_call.h" // _PyObject_CallMethod() #include "pycore_codecs.h" // _PyCodecInfo_GetIncrementalDecoder() #include "pycore_fileutils.h" // _Py_GetLocaleEncoding() #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "_iomodule.h" /*[clinic input] module _io class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type" class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type" class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/ nldecoder_object; textio; #define clinic_state … #include "clinic/textio.c.h" #undef clinic_state /* TextIOBase */ PyDoc_STRVAR(textiobase_doc, "Base class for text I/O.\n" "\n" "This class provides a character and line based interface to stream\n" "I/O. There is no readinto method because Python's character strings\n" "are immutable.\n" ); static PyObject * _unsupported(_PyIO_State *state, const char *message) { … } /*[clinic input] _io._TextIOBase.detach cls: defining_class / Separate the underlying buffer from the TextIOBase and return it. After the underlying buffer has been detached, the TextIO is in an unusable state. [clinic start generated code]*/ static PyObject * _io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls) /*[clinic end generated code: output=50915f40c609eaa4 input=987ca3640d0a3776]*/ { … } /*[clinic input] _io._TextIOBase.read cls: defining_class size: int(unused=True) = -1 / Read at most size characters from stream. Read from underlying buffer until we have size characters or we hit EOF. If size is negative or omitted, read until EOF. [clinic start generated code]*/ static PyObject * _io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls, int Py_UNUSED(size)) /*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/ { … } /*[clinic input] _io._TextIOBase.readline cls: defining_class size: int(unused=True) = -1 / Read until newline or EOF. Return an empty string if EOF is hit immediately. If size is specified, at most size characters will be read. [clinic start generated code]*/ static PyObject * _io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls, int Py_UNUSED(size)) /*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/ { … } /*[clinic input] _io._TextIOBase.write cls: defining_class s: str(unused=True) / Write string s to stream. Return the number of characters written (which is always equal to the length of the string). [clinic start generated code]*/ static PyObject * _io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls, const char *Py_UNUSED(s)) /*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/ { … } /*[clinic input] @getter _io._TextIOBase.encoding Encoding of the text stream. Subclasses should override. [clinic start generated code]*/ static PyObject * _io__TextIOBase_encoding_get_impl(PyObject *self) /*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/ { … } /*[clinic input] @getter _io._TextIOBase.newlines Line endings translated so far. Only line endings translated during reading are considered. Subclasses should override. [clinic start generated code]*/ static PyObject * _io__TextIOBase_newlines_get_impl(PyObject *self) /*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/ { … } /*[clinic input] @getter _io._TextIOBase.errors The error setting of the decoder or encoder. Subclasses should override. [clinic start generated code]*/ static PyObject * _io__TextIOBase_errors_get_impl(PyObject *self) /*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/ { … } static PyMethodDef textiobase_methods[] = …; static PyGetSetDef textiobase_getset[] = …; static PyType_Slot textiobase_slots[] = …; /* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */ PyType_Spec textiobase_spec = …; /* IncrementalNewlineDecoder */ struct nldecoder_object { … }; /*[clinic input] _io.IncrementalNewlineDecoder.__init__ decoder: object translate: bool errors: object(c_default="NULL") = "strict" Codec used when reading a file in universal newlines mode. It wraps another incremental decoder, translating \r\n and \r into \n. It also records the types of newlines encountered. When used with translate=False, it ensures that the newline sequence is returned in one piece. When used with decoder=None, it expects unicode strings as decode input and translates newlines without first invoking an external decoder. [clinic start generated code]*/ static int _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self, PyObject *decoder, int translate, PyObject *errors) /*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/ { … } static int incrementalnewlinedecoder_traverse(nldecoder_object *self, visitproc visit, void *arg) { … } static int incrementalnewlinedecoder_clear(nldecoder_object *self) { … } static void incrementalnewlinedecoder_dealloc(nldecoder_object *self) { … } static int check_decoded(PyObject *decoded) { … } #define CHECK_INITIALIZED_DECODER(self) … #define SEEN_CR … #define SEEN_LF … #define SEEN_CRLF … #define SEEN_ALL … PyObject * _PyIncrementalNewlineDecoder_decode(PyObject *myself, PyObject *input, int final) { … } /*[clinic input] _io.IncrementalNewlineDecoder.decode input: object final: bool = False [clinic start generated code]*/ static PyObject * _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self, PyObject *input, int final) /*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/ { … } /*[clinic input] _io.IncrementalNewlineDecoder.getstate [clinic start generated code]*/ static PyObject * _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/ { … } /*[clinic input] _io.IncrementalNewlineDecoder.setstate state: object / [clinic start generated code]*/ static PyObject * _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self, PyObject *state) /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/ { … } /*[clinic input] _io.IncrementalNewlineDecoder.reset [clinic start generated code]*/ static PyObject * _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/ { … } static PyObject * incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context) { … } /* TextIOWrapper */ encodefunc_t; struct textio { … }; static void textiowrapper_set_decoded_chars(textio *self, PyObject *chars); /* A couple of specialized cases in order to bypass the slow incremental encoding methods for the most popular encodings. */ static PyObject * ascii_encode(textio *self, PyObject *text) { … } static PyObject * utf16be_encode(textio *self, PyObject *text) { … } static PyObject * utf16le_encode(textio *self, PyObject *text) { … } static PyObject * utf16_encode(textio *self, PyObject *text) { … } static PyObject * utf32be_encode(textio *self, PyObject *text) { … } static PyObject * utf32le_encode(textio *self, PyObject *text) { … } static PyObject * utf32_encode(textio *self, PyObject *text) { … } static PyObject * utf8_encode(textio *self, PyObject *text) { … } static PyObject * latin1_encode(textio *self, PyObject *text) { … } // Return true when encoding can be skipped when text is ascii. static inline int is_asciicompat_encoding(encodefunc_t f) { … } /* Map normalized encoding names onto the specialized encoding funcs */ encodefuncentry; static const encodefuncentry encodefuncs[] = …; static int validate_newline(const char *newline) { … } static int set_newline(textio *self, const char *newline) { … } static int _textiowrapper_set_decoder(textio *self, PyObject *codec_info, const char *errors) { … } static PyObject* _textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes, int eof) { … } static int _textiowrapper_set_encoder(textio *self, PyObject *codec_info, const char *errors) { … } static int _textiowrapper_fix_encoder_state(textio *self) { … } static int io_check_errors(PyObject *errors) { … } /*[clinic input] _io.TextIOWrapper.__init__ buffer: object encoding: str(accept={str, NoneType}) = None errors: object = None newline: str(accept={str, NoneType}) = None line_buffering: bool = False write_through: bool = False Character and line based layer over a BufferedIOBase object, buffer. encoding gives the name of the encoding that the stream will be decoded or encoded with. It defaults to locale.getencoding(). errors determines the strictness of encoding and decoding (see help(codecs.Codec) or the documentation for codecs.register) and defaults to "strict". newline controls how line endings are handled. It can be None, '', '\n', '\r', and '\r\n'. It works as follows: * On input, if newline is None, universal newlines mode is enabled. Lines in the input can end in '\n', '\r', or '\r\n', and these are translated into '\n' before being returned to the caller. If it is '', universal newline mode is enabled, but line endings are returned to the caller untranslated. If it has any of the other legal values, input lines are only terminated by the given string, and the line ending is returned to the caller untranslated. * On output, if newline is None, any '\n' characters written are translated to the system default line separator, os.linesep. If newline is '' or '\n', no translation takes place. If newline is any of the other legal values, any '\n' characters written are translated to the given string. If line_buffering is True, a call to flush is implied when a call to write contains a newline character. [clinic start generated code]*/ static int _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, const char *encoding, PyObject *errors, const char *newline, int line_buffering, int write_through) /*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/ { … } /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true, * -1 on error. */ static int convert_optional_bool(PyObject *obj, int default_value) { … } static int textiowrapper_change_encoding(textio *self, PyObject *encoding, PyObject *errors, int newline_changed) { … } /*[clinic input] @critical_section _io.TextIOWrapper.reconfigure * encoding: object = None errors: object = None newline as newline_obj: object(c_default="NULL") = None line_buffering as line_buffering_obj: object = None write_through as write_through_obj: object = None Reconfigure the text stream with new parameters. This also does an implicit stream flush. [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, PyObject *errors, PyObject *newline_obj, PyObject *line_buffering_obj, PyObject *write_through_obj) /*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/ { … } static int textiowrapper_clear(textio *self) { … } static void textiowrapper_dealloc(textio *self) { … } static int textiowrapper_traverse(textio *self, visitproc visit, void *arg) { … } static PyObject * _io_TextIOWrapper_closed_get_impl(textio *self); /* This macro takes some shortcuts to make the common case faster. */ #define CHECK_CLOSED(self) … #define CHECK_INITIALIZED(self) … #define CHECK_ATTACHED(self) … #define CHECK_ATTACHED_INT(self) … /*[clinic input] @critical_section _io.TextIOWrapper.detach [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_detach_impl(textio *self) /*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/ { … } /* Flush the internal write buffer. This doesn't explicitly flush the underlying buffered object, though. */ static int _textiowrapper_writeflush(textio *self) { … } /*[clinic input] @critical_section _io.TextIOWrapper.write text: unicode / [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_write_impl(textio *self, PyObject *text) /*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/ { … } /* Steal a reference to chars and store it in the decoded_char buffer; */ static void textiowrapper_set_decoded_chars(textio *self, PyObject *chars) { … } static PyObject * textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) { … } /* Read and decode the next chunk of data from the BufferedReader. */ static int textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) { … } /*[clinic input] @critical_section _io.TextIOWrapper.read size as n: Py_ssize_t(accept={int, NoneType}) = -1 / [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) /*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/ { … } /* NOTE: `end` must point to the real end of the Py_UCS4 storage, that is to the NUL character. Otherwise the function will produce incorrect results. */ static const char * find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch) { … } Py_ssize_t _PyIO_find_line_ending( int translated, int universal, PyObject *readnl, int kind, const char *start, const char *end, Py_ssize_t *consumed) { … } static PyObject * _textiowrapper_readline(textio *self, Py_ssize_t limit) { … } /*[clinic input] @critical_section _io.TextIOWrapper.readline size: Py_ssize_t = -1 / [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size) /*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/ { … } /* Seek and Tell */ cookie_type; /* To speed up cookie packing/unpacking, we store the fields in a temporary string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.). The following macros define at which offsets in the intermediary byte string the various CookieStruct fields will be stored. */ #define COOKIE_BUF_LEN … #if PY_BIG_ENDIAN /* We want the least significant byte of start_pos to also be the least significant byte of the cookie, which means that in big-endian mode we must copy the fields in reverse order. */ #define OFF_START_POS … #define OFF_DEC_FLAGS … #define OFF_BYTES_TO_FEED … #define OFF_CHARS_TO_SKIP … #define OFF_NEED_EOF … #else /* Little-endian mode: the least significant byte of start_pos will naturally end up the least significant byte of the cookie. */ #define OFF_START_POS … #define OFF_DEC_FLAGS … #define OFF_BYTES_TO_FEED … #define OFF_CHARS_TO_SKIP … #define OFF_NEED_EOF … #endif static int textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) { … } static PyObject * textiowrapper_build_cookie(cookie_type *cookie) { … } static int _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) { … } static int _textiowrapper_encoder_reset(textio *self, int start_of_stream) { … } static int _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) { … } /*[clinic input] @critical_section _io.TextIOWrapper.seek cookie as cookieObj: object Zero or an opaque number returned by tell(). whence: int(c_default='0') = os.SEEK_SET The relative position to seek from. / Set the stream position, and return the new stream position. Four operations are supported, given by the following argument combinations: - seek(0, SEEK_SET): Rewind to the start of the stream. - seek(cookie, SEEK_SET): Restore a previous position; 'cookie' must be a number returned by tell(). - seek(0, SEEK_END): Fast-forward to the end of the stream. - seek(0, SEEK_CUR): Leave the current stream position unchanged. Any other argument combinations are invalid, and may raise exceptions. [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) /*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/ { … } /*[clinic input] @critical_section _io.TextIOWrapper.tell Return the stream position as an opaque number. The return value of tell() can be given as input to seek(), to restore a previous stream position. [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_tell_impl(textio *self) /*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/ { … } /*[clinic input] @critical_section _io.TextIOWrapper.truncate pos: object = None / [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos) /*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/ { … } static PyObject * textiowrapper_repr(textio *self) { … } /* Inquiries */ /*[clinic input] @critical_section _io.TextIOWrapper.fileno [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_fileno_impl(textio *self) /*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/ { … } /*[clinic input] @critical_section _io.TextIOWrapper.seekable [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_seekable_impl(textio *self) /*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/ { … } /*[clinic input] @critical_section _io.TextIOWrapper.readable [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_readable_impl(textio *self) /*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/ { … } /*[clinic input] @critical_section _io.TextIOWrapper.writable [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_writable_impl(textio *self) /*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/ { … } /*[clinic input] @critical_section _io.TextIOWrapper.isatty [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_isatty_impl(textio *self) /*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/ { … } /*[clinic input] @critical_section _io.TextIOWrapper.flush [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_flush_impl(textio *self) /*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/ { … } /*[clinic input] @critical_section _io.TextIOWrapper.close [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_close_impl(textio *self) /*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/ { … } static PyObject * textiowrapper_iternext(textio *self) { … } /*[clinic input] @critical_section @getter _io.TextIOWrapper.name [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_name_get_impl(textio *self) /*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/ { … } /*[clinic input] @critical_section @getter _io.TextIOWrapper.closed [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_closed_get_impl(textio *self) /*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/ { … } /*[clinic input] @critical_section @getter _io.TextIOWrapper.newlines [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_newlines_get_impl(textio *self) /*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/ { … } /*[clinic input] @critical_section @getter _io.TextIOWrapper.errors [clinic start generated code]*/ static PyObject * _io_TextIOWrapper_errors_get_impl(textio *self) /*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/ { … } /*[clinic input] @critical_section @getter _io.TextIOWrapper._CHUNK_SIZE [clinic start generated code]*/ static PyObject * _io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self) /*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/ { … } /*[clinic input] @critical_section @setter _io.TextIOWrapper._CHUNK_SIZE [clinic start generated code]*/ static int _io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value) /*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/ { … } static PyMethodDef incrementalnewlinedecoder_methods[] = …; static PyGetSetDef incrementalnewlinedecoder_getset[] = …; static PyType_Slot nldecoder_slots[] = …; PyType_Spec nldecoder_spec = …; static PyMethodDef textiowrapper_methods[] = …; static PyMemberDef textiowrapper_members[] = …; static PyGetSetDef textiowrapper_getset[] = …; PyType_Slot textiowrapper_slots[] = …; PyType_Spec textiowrapper_spec = …;