cpython/Parser/tokenizer/file_tokenizer.c

#include "Python.h"
#include "pycore_call.h"
#include "pycore_import.h"
#include "pycore_fileutils.h"
#include "errcode.h"

#ifdef HAVE_UNISTD_H
#  include <unistd.h>             // lseek(), read()
#endif

#include "helpers.h"
#include "../lexer/state.h"
#include "../lexer/lexer.h"
#include "../lexer/buffer.h"

static int
tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {}

static int
tok_readline_raw(struct tok_state *tok)
{}

static int
tok_readline_recode(struct tok_state *tok) {}

/* Fetch the next byte from TOK. */
static int fp_getc(struct tok_state *tok) {}

/* Unfetch the last byte back into TOK.  */
static void fp_ungetc(int c, struct tok_state *tok) {}

/* Set the readline function for TOK to a StreamReader's
   readline function. The StreamReader is named ENC.

   This function is called from _PyTokenizer_check_bom and _PyTokenizer_check_coding_spec.

   ENC is usually identical to the future value of tok->encoding,
   except for the (currently unsupported) case of UTF-16.

   Return 1 on success, 0 on failure. */
static int
fp_setreadl(struct tok_state *tok, const char* enc)
{}

static int
tok_underflow_interactive(struct tok_state *tok) {}

static int
tok_underflow_file(struct tok_state *tok) {}

/* Set up tokenizer for file */
struct tok_state *
_PyTokenizer_FromFile(FILE *fp, const char* enc,
                      const char *ps1, const char *ps2)
{}

#if defined(__wasi__) || (defined(__EMSCRIPTEN__) && (__EMSCRIPTEN_major__ >= 3))
// fdopen() with borrowed fd. WASI does not provide dup() and Emscripten's
// dup() emulation with open() is slow.
typedef union {
    void *cookie;
    int fd;
} borrowed;

static ssize_t
borrow_read(void *cookie, char *buf, size_t size)
{
    borrowed b = {.cookie = cookie};
    return read(b.fd, (void *)buf, size);
}

static FILE *
fdopen_borrow(int fd) {
    // supports only reading. seek fails. close and write are no-ops.
    cookie_io_functions_t io_cb = {borrow_read, NULL, NULL, NULL};
    borrowed b = {.fd = fd};
    return fopencookie(b.cookie, "r", io_cb);
}
#else
static FILE *
fdopen_borrow(int fd) {}
#endif

/* Get the encoding of a Python file. Check for the coding cookie and check if
   the file starts with a BOM.

   _PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
   encoding in the first or second line of the file (in which case the encoding
   should be assumed to be UTF-8).

   The char* returned is malloc'ed via PyMem_Malloc() and thus must be freed
   by the caller. */
char *
_PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{}