unicode_format.h | Explore in Territory

/*
    unicode_format.h -- implementation of str.format().
*/

#include "pycore_complexobject.h" // _PyComplex_FormatAdvancedWriter()
#include "pycore_floatobject.h"   // _PyFloat_FormatAdvancedWriter()

/************************************************************************/
/***********   Global data structures and forward declarations  *********/
/************************************************************************/

/*
   A SubString consists of the characters between two string or
   unicode pointers.
*/
SubString;


AutoNumberState;   /* Keep track if we're auto-numbering fields */

/* Keeps track of our auto-numbering state, and which number field we're on */
AutoNumber;


/* forward declaration for recursion */
static PyObject *
build_string(SubString *input, PyObject *args, PyObject *kwargs,
             int recursion_depth, AutoNumber *auto_number);



/************************************************************************/
/**************************  Utility  functions  ************************/
/************************************************************************/

static void
AutoNumber_Init(AutoNumber *auto_number)
{ … }

/* fill in a SubString from a pointer and length */
Py_LOCAL_INLINE(void)
SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
{ … }

/* return a new string.  if str->str is NULL, return None */
Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString *str)
{ … }

/* return a new string.  if str->str is NULL, return a new empty string */
Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString *str)
{ … }

/* Return 1 if an error has been detected switching between automatic
   field numbering and manual field specification, else return 0. Set
   ValueError on error. */
static int
autonumber_state_error(AutoNumberState state, int field_name_is_empty)
{ … }


/************************************************************************/
/***********  Format string parsing -- integers and identifiers *********/
/************************************************************************/

static Py_ssize_t
get_integer(const SubString *str)
{ … }

/************************************************************************/
/******** Functions to get field objects and specification strings ******/
/************************************************************************/

/* do the equivalent of obj.name */
static PyObject *
getattr(PyObject *obj, SubString *name)
{ … }

/* do the equivalent of obj[idx], where obj is a sequence */
static PyObject *
getitem_sequence(PyObject *obj, Py_ssize_t idx)
{ … }

/* do the equivalent of obj[idx], where obj is not a sequence */
static PyObject *
getitem_idx(PyObject *obj, Py_ssize_t idx)
{ … }

/* do the equivalent of obj[name] */
static PyObject *
getitem_str(PyObject *obj, SubString *name)
{ … }

FieldNameIterator;


static int
FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
                       Py_ssize_t start, Py_ssize_t end)
{ … }

static int
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
{ … }

static int
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
{ … }

/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
static int
FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
                       Py_ssize_t *name_idx, SubString *name)
{ … }


/* input: field_name
   output: 'first' points to the part before the first '[' or '.'
           'first_idx' is -1 if 'first' is not an integer, otherwise
                       it's the value of first converted to an integer
           'rest' is an iterator to return the rest
*/
static int
field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
                 Py_ssize_t *first_idx, FieldNameIterator *rest,
                 AutoNumber *auto_number)
{ … }


/*
    get_field_object returns the object inside {}, before the
    format_spec.  It handles getindex and getattr lookups and consumes
    the entire input string.
*/
static PyObject *
get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
                 AutoNumber *auto_number)
{ … }

/************************************************************************/
/*****************  Field rendering functions  **************************/
/************************************************************************/

/*
    render_field() is the main function in this section.  It takes the
    field object and field specification string generated by
    get_field_and_spec, and renders the field into the output string.

    render_field calls fieldobj.__format__(format_spec) method, and
    appends to the output.
*/
static int
render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
{ … }

static int
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
            int *format_spec_needs_expanding, Py_UCS4 *conversion)
{ … }

/************************************************************************/
/******* Output string allocation and escape-to-markup processing  ******/
/************************************************************************/

/* MarkupIterator breaks the string into pieces of either literal
   text, or things inside {} that need to be marked up.  it is
   designed to make it easy to wrap a Python iterator around it, for
   use with the Formatter class */

MarkupIterator;

static int
MarkupIterator_init(MarkupIterator *self, PyObject *str,
                    Py_ssize_t start, Py_ssize_t end)
{ … }

/* returns 0 on error, 1 on non-error termination, and 2 if it got a
   string (or something to be expanded) */
static int
MarkupIterator_next(MarkupIterator *self, SubString *literal,
                    int *field_present, SubString *field_name,
                    SubString *format_spec, Py_UCS4 *conversion,
                    int *format_spec_needs_expanding)
{ … }


/* do the !r or !s conversion on obj */
static PyObject *
do_conversion(PyObject *obj, Py_UCS4 conversion)
{ … }

/* given:

   {field_name!conversion:format_spec}

   compute the result and write it to output.
   format_spec_needs_expanding is an optimization.  if it's false,
   just output the string directly, otherwise recursively expand the
   format_spec string.

   field_name is allowed to be zero length, in which case we
   are doing auto field numbering.
*/

static int
output_markup(SubString *field_name, SubString *format_spec,
              int format_spec_needs_expanding, Py_UCS4 conversion,
              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
              int recursion_depth, AutoNumber *auto_number)
{ … }

/*
    do_markup is the top-level loop for the format() method.  It
    searches through the format string for escapes to markup codes, and
    calls other functions to move non-markup text to the output,
    and to perform the markup to the output.
*/
static int
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
{ … }


/*
    build_string allocates the output string and then
    calls do_markup to do the heavy lifting.
*/
static PyObject *
build_string(SubString *input, PyObject *args, PyObject *kwargs,
             int recursion_depth, AutoNumber *auto_number)
{ … }

/************************************************************************/
/*********** main routine ***********************************************/
/************************************************************************/

/* this is the main entry point */
static PyObject *
do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
{ … }

static PyObject *
do_string_format_map(PyObject *self, PyObject *obj)
{ … }


/************************************************************************/
/*********** formatteriterator ******************************************/
/************************************************************************/

/* This is used to implement string.Formatter.vparse().  It exists so
   Formatter can share code with the built in unicode.format() method.
   It's really just a wrapper around MarkupIterator that is callable
   from Python. */

formatteriterobject;

static void
formatteriter_dealloc(formatteriterobject *it)
{ … }

/* returns a tuple:
   (literal, field_name, format_spec, conversion)

   literal is any literal text to output.  might be zero length
   field_name is the string before the ':'.  might be None
   format_spec is the string after the ':'.  mibht be None
   conversion is either None, or the string after the '!'
*/
static PyObject *
formatteriter_next(formatteriterobject *it)
{ … }

static PyMethodDef formatteriter_methods[] = …;

static PyTypeObject PyFormatterIter_Type = …;

/* unicode_formatter_parser is used to implement
   string.Formatter.vformat.  it parses a string and returns tuples
   describing the parsed elements.  It's a wrapper around
   stringlib/string_format.h's MarkupIterator */
static PyObject *
formatter_parser(PyObject *ignored, PyObject *self)
{ … }


/************************************************************************/
/*********** fieldnameiterator ******************************************/
/************************************************************************/


/* This is used to implement string.Formatter.vparse().  It parses the
   field name into attribute and item values.  It's a Python-callable
   wrapper around FieldNameIterator */

fieldnameiterobject;

static void
fieldnameiter_dealloc(fieldnameiterobject *it)
{ … }

/* returns a tuple:
   (is_attr, value)
   is_attr is true if we used attribute syntax (e.g., '.foo')
              false if we used index syntax (e.g., '[foo]')
   value is an integer or string
*/
static PyObject *
fieldnameiter_next(fieldnameiterobject *it)
{ … }

static PyMethodDef fieldnameiter_methods[] = …;

static PyTypeObject PyFieldNameIter_Type = …;

/* unicode_formatter_field_name_split is used to implement
   string.Formatter.vformat.  it takes a PEP 3101 "field name", and
   returns a tuple of (first, rest): "first", the part before the
   first '.' or '['; and "rest", an iterator for the rest of the field
   name.  it's a wrapper around stringlib/string_format.h's
   field_name_split.  The iterator it returns is a
   FieldNameIterator */
static PyObject *
formatter_field_name_split(PyObject *ignored, PyObject *self)
{ … }
cpython/Objects/stringlib/unicode_format.h