godot/thirdparty/pcre2/src/pcre2_match.c

/*************************************************
*      Perl-Compatible Regular Expressions       *
*************************************************/

/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
          New API code Copyright (c) 2015-2024 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

    * Neither the name of the University of Cambridge nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/


#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "pcre2_internal.h"

/* These defines enable debugging code */

/* #define DEBUG_FRAMES_DISPLAY */
/* #define DEBUG_SHOW_OPS */
/* #define DEBUG_SHOW_RMATCH */

#ifdef DEBUG_FRAMES_DISPLAY
#include <stdarg.h>
#endif

#ifdef DEBUG_SHOW_OPS
static const char *OP_names[] = { OP_NAME_LIST };
#endif

/* These defines identify the name of the block containing "static"
information, and fields within it. */

#define NLBLOCK
#define PSSTART
#define PSEND

#define RECURSE_UNSET

/* Masks for identifying the public options that are permitted at match time. */

#define PUBLIC_MATCH_OPTIONS

#define PUBLIC_JIT_MATCH_OPTIONS

/* Non-error returns from and within the match() function. Error returns are
externally defined PCRE2_ERROR_xxx codes, which are all negative. */

#define MATCH_MATCH
#define MATCH_NOMATCH

/* Special internal returns used in the match() function. Make them
sufficiently negative to avoid the external error codes. */

#define MATCH_ACCEPT
#define MATCH_KETRPOS
/* The next 5 must be kept together and in sequence so that a test that checks
for any one of them can use a range. */
#define MATCH_COMMIT
#define MATCH_PRUNE
#define MATCH_SKIP
#define MATCH_SKIP_ARG
#define MATCH_THEN
#define MATCH_BACKTRACK_MAX
#define MATCH_BACKTRACK_MIN

/* Group frame type values. Zero means the frame is not a group frame. The
lower 16 bits are used for data (e.g. the capture number). Group frames are
used for most groups so that information about the start is easily available at
the end without having to scan back through intermediate frames (backtrack
points). */

#define GF_CAPTURE
#define GF_NOCAPTURE
#define GF_CONDASSERT
#define GF_RECURSE

/* Masks for the identity and data parts of the group frame type. */

#define GF_IDMASK(a)
#define GF_DATAMASK(a)

/* Repetition types */

enum {};

/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
infinity. */

static const uint32_t rep_min[] =;  /* OP_CRPOS{STAR, PLUS, QUERY} */

static const uint32_t rep_max[] =; /* OP_CRPOS{STAR, PLUS, QUERY} */

/* Repetition types - must include OP_CRPOSRANGE (not needed above) */

static const uint32_t rep_typ[] =;  /* OP_CRPOSQUERY, OP_CRPOSRANGE */

/* Numbers for RMATCH calls at backtracking points. When these lists are
changed, the code at RETURN_SWITCH below must be updated in sync.  */

enum {};

#ifdef SUPPORT_WIDE_CHARS
enum {};
#endif

#ifdef SUPPORT_UNICODE
enum {};
#endif

/* Define short names for general fields in the current backtrack frame, which
is always pointed to by the F variable. Occasional references to fields in
other frames are written out explicitly. There are also some fields in the
current frame whose names start with "temp" that are used for short-term,
localised backtracking memory. These are #defined with Lxxx names at the point
of use and undefined afterwards. */

#define Fback_frame
#define Fcapture_last
#define Fcurrent_recurse
#define Fecode
#define Feptr
#define Fgroup_frame_type
#define Flast_group_offset
#define Flength
#define Fmark
#define Frdepth
#define Fstart_match
#define Foffset_top
#define Foccu
#define Fop
#define Fovector
#define Freturn_id


#ifdef DEBUG_FRAMES_DISPLAY
/*************************************************
*      Display current frames and contents       *
*************************************************/

/* This debugging function displays the current set of frames and their
contents. It is not called automatically from anywhere, the intention being
that calls can be inserted where necessary when debugging frame-related
problems.

Arguments:
  f           the file to write to
  F           the current top frame
  P           a previous frame of interest
  frame_size  the frame size
  mb          points to the match block
  match_data  points to the match data block
  s           identification text

Returns:    nothing
*/

static void
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
  match_block *mb, pcre2_match_data *match_data, const char *s, ...)
{
uint32_t i;
heapframe *Q;
va_list ap;
va_start(ap, s);

fprintf(f, "FRAMES ");
vfprintf(f, s, ap);
va_end(ap);

if (P != NULL) fprintf(f, " P=%lu",
  ((char *)P - (char *)(match_data->heapframes))/frame_size);
fprintf(f, "\n");

for (i = 0, Q = match_data->heapframes;
     Q <= F;
     i++, Q = (heapframe *)((char *)Q + frame_size))
  {
  fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
    i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
    Q->back_frame, Q->return_id);

  if (Q->last_group_offset == PCRE2_UNSET)
    fprintf(f, " lgoffset=unset\n");
  else
    fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
  }
}

#endif



/*************************************************
*                Process a callout               *
*************************************************/

/* This function is called for all callouts, whether "standalone" or at the
start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
with fixed values.

Arguments:
  F          points to the current backtracking frame
  mb         points to the match block
  lengthptr  where to return the length of the callout item

Returns:     the return from the callout
             or 0 if no callout function exists
*/

static int
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
{}



/*************************************************
*          Match a back-reference                *
*************************************************/

/* This function is called only when it is known that the offset lies within
the offsets that have so far been used in the match. Note that in caseless
UTF-8 mode, the number of subject bytes matched may be different to the number
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
seems unlikely.)

Arguments:
  offset      index into the offset vector
  caseless    TRUE if caseless
  F           the current backtracking frame pointer
  mb          points to match block
  lengthptr   pointer for returning the length matched

Returns:      = 0 sucessful match; number of code units matched is set
              < 0 no match
              > 0 partial match
*/

static int
match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
  PCRE2_SIZE *lengthptr)
{}



/******************************************************************************
*******************************************************************************
                   "Recursion" in the match() function

The original match() function was highly recursive, but this proved to be the
source of a number of problems over the years, mostly because of the relatively
small system stacks that are commonly found. As new features were added to
patterns, various kludges were invented to reduce the amount of stack used,
making the code hard to understand in places.

A version did exist that used individual frames on the heap instead of calling
match() recursively, but this ran substantially slower. The current version is
a refactoring that uses a vector of frames to remember backtracking points.
This runs no slower, and possibly even a bit faster than the original recursive
implementation.

At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
frames) was allocated on the system stack. If this was not big enough, the heap
was used for a larger vector. However, it turns out that there are environments
where taking as little as 20KiB from the system stack is an embarrassment.
After another refactoring, the heap is used exclusively, but a pointer the
frames vector and its size are cached in the match_data block, so that there is
no new memory allocation if the same match_data block is used for multiple
matches (unless the frames vector has to be extended).
*******************************************************************************
******************************************************************************/




/*************************************************
*       Macros for the match() function          *
*************************************************/

/* These macros pack up tests that are used for partial matching several times
in the code. The second one is used when we already know we are past the end of
the subject. We set the "hit end" flag if the pointer is at the end of the
subject and either (a) the pointer is past the earliest inspected character
(i.e. something has been matched, even if not part of the actual matched
string), or (b) the pattern contains a lookbehind. These are the conditions for
which adding more characters may allow the current match to continue.

For hard partial matching, we immediately return a partial match. Otherwise,
carrying on means that a complete match on the current subject will be sought.
A partial match is returned only if no complete match can be found. */

#define CHECK_PARTIAL()

#define SCHECK_PARTIAL()


/* These macros are used to implement backtracking. They simulate a recursive
call to the match() function by means of a local vector of frames which
remember the backtracking points. */

#define RMATCH(ra,rb)

#define RRETURN(ra)



/*************************************************
*         Match from current position            *
*************************************************/

/* This function is called to run one match attempt at a single starting point
in the subject.

Performance note: It might be tempting to extract commonly used fields from the
mb structure (e.g. end_subject) into individual variables to improve
performance. Tests using gcc on a SPARC disproved this; in the first case, it
made performance worse.

Arguments:
   start_eptr   starting character in subject
   start_ecode  starting position in compiled code
   top_bracket  number of capturing parentheses in the pattern
   frame_size   size of each backtracking frame
   match_data   pointer to the match_data block
   mb           pointer to "static" variables block

Returns:        MATCH_MATCH if matched            )  these values are >= 0
                MATCH_NOMATCH if failed to match  )
                negative MATCH_xxx value for PRUNE, SKIP, etc
                negative PCRE2_ERROR_xxx value if aborted by an error condition
                (e.g. stopped by repeated call or depth limit)
*/

static int
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
  PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
{}


/*************************************************
*           Match a Regular Expression           *
*************************************************/

/* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring.

Arguments:
  code            points to the compiled expression
  subject         points to the subject string
  length          length of subject string (may contain binary zeros)
  start_offset    where to start in the subject string
  options         option bits
  match_data      points to a match_data block
  mcontext        points a PCRE2 context

Returns:          > 0 => success; value is the number of ovector pairs filled
                  = 0 => success, but ovector is not big enough
                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
                  < -2 => some kind of unexpected problem
*/

PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
  pcre2_match_context *mcontext)
{}

/* These #undefs are here to enable unity builds with CMake. */

#undef NLBLOCK /* Block containing newline information */
#undef PSSTART /* Field containing processed string start */
#undef PSEND   /* Field containing processed string end */

/* End of pcre2_match.c */