helpers.c | Explore in Territory

#include "Python.h"
#include "errcode.h"
#include "pycore_token.h"

#include "../lexer/state.h"


/* ############## ERRORS ############## */

static int
_syntaxerror_range(struct tok_state *tok, const char *format,
                   int col_offset, int end_col_offset,
                   va_list vargs)
{ … }

int
_PyTokenizer_syntaxerror(struct tok_state *tok, const char *format, ...)
{ … }

int
_PyTokenizer_syntaxerror_known_range(struct tok_state *tok,
                        int col_offset, int end_col_offset,
                        const char *format, ...)
{ … }

int
_PyTokenizer_indenterror(struct tok_state *tok)
{ … }

char *
_PyTokenizer_error_ret(struct tok_state *tok) /* XXX */
{ … }

int
_PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_invalid_escape_char)
{ … }

int
_PyTokenizer_parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...)
{ … }


/* ############## STRING MANIPULATION ############## */

char *
_PyTokenizer_new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
{ … }

PyObject *
_PyTokenizer_translate_into_utf8(const char* str, const char* enc) { … }

char *
_PyTokenizer_translate_newlines(const char *s, int exec_input, int preserve_crlf,
                   struct tok_state *tok) { … }

/* ############## ENCODING STUFF ############## */


/* See whether the file starts with a BOM. If it does,
   invoke the set_readline function with the new encoding.
   Return 1 on success, 0 on failure.  */
int
_PyTokenizer_check_bom(int get_char(struct tok_state *),
          void unget_char(int, struct tok_state *),
          int set_readline(struct tok_state *, const char *),
          struct tok_state *tok)
{ … }

static const char *
get_normal_name(const char *s)  /* for utf-8 and latin-1 */
{ … }

/* Return the coding spec in S, or NULL if none is found.  */
static int
get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
{ … }

/* Check whether the line contains a coding spec. If it does,
   invoke the set_readline function for the new encoding.
   This function receives the tok_state and the new encoding.
   Return 1 on success, 0 on failure.  */
int
_PyTokenizer_check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
                  int set_readline(struct tok_state *, const char *))
{ … }

/* Check whether the characters at s start a valid
   UTF-8 sequence. Return the number of characters forming
   the sequence if yes, 0 if not.  The special cases match
   those in stringlib/codecs.h:utf8_decode.
*/
static int
valid_utf8(const unsigned char* s)
{ … }

int
_PyTokenizer_ensure_utf8(char *line, struct tok_state *tok)
{ … }


/* ############## DEBUGGING STUFF ############## */

#ifdef Py_DEBUG
void
_PyTokenizer_print_escape(FILE *f, const char *s, Py_ssize_t size)
{
    if (s == NULL) {
        fputs("NULL", f);
        return;
    }
    putc('"', f);
    while (size-- > 0) {
        unsigned char c = *s++;
        switch (c) {
            case '\n': fputs("\\n", f); break;
            case '\r': fputs("\\r", f); break;
            case '\t': fputs("\\t", f); break;
            case '\f': fputs("\\f", f); break;
            case '\'': fputs("\\'", f); break;
            case '"': fputs("\\\"", f); break;
            default:
                if (0x20 <= c && c <= 0x7f)
                    putc(c, f);
                else
                    fprintf(f, "\\x%02x", c);
        }
    }
    putc('"', f);
}

void
_PyTokenizer_tok_dump(int type, char *start, char *end)
{
    fprintf(stderr, "%s", _PyParser_TokenNames[type]);
    if (type == NAME || type == NUMBER || type == STRING || type == OP)
        fprintf(stderr, "(%.*s)", (int)(end - start), start);
}
#endif
cpython/Parser/tokenizer/helpers.c