pcre2_ucd.c | Explore in Territory

/*************************************************
*      Perl-Compatible Regular Expressions       *
*************************************************/

/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
          New API code Copyright (c) 2016-2022 University of Cambridge

This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
Instead, modify the maint/GenerateUcd.py script and run it to generate
a new version of this code.

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

    * Neither the name of the University of Cambridge nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/

/* This file contains tables of Unicode properties that are extracted from
Unicode data files. See the comments at the start of maint/GenerateUcd.py for
details.

As well as being part of the PCRE2 library, this file is #included by the
pcre2test program, which redefines the PRIV macro to change table names from
_pcre2_xxx to xxxx, thereby avoiding name clashes with the library. At present,
just one of these tables is actually needed. When compiling the library, some
headers are needed. */

#ifndef PCRE2_PCRE2TEST
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#endif /* PCRE2_PCRE2TEST */

/* The tables herein are needed only when UCP support is built, and in PCRE2
that happens automatically with UTF support. This module should not be
referenced otherwise, so it should not matter whether it is compiled or not.
However a comment was received about space saving - maybe the guy linked all
the modules rather than using a library - so we include a condition to cut out
the tables when not needed. But don't leave a totally empty module because some
compilers barf at that. Instead, just supply some small dummy tables. */

#ifndef SUPPORT_UNICODE
const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}};
const uint16_t PRIV(ucd_stage1)[] = {0};
const uint16_t PRIV(ucd_stage2)[] = {0};
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
#else

/* Total size: 112564 bytes, block size: 128. */

const char *PRIV(unicode_version) = …;

/* When recompiling tables with a new Unicode version, please check the types
in this structure definition with those in pcre2_internal.h (the actual field
names will be different).

typedef struct {
uint8_t property_0;
uint8_t property_1;
uint8_t property_2;
uint8_t property_3;
int32_t property_4;
uint16_t property_5;
uint16_t property_6;
} ucd_record;
*/

/* If the 32-bit library is run in non-32-bit mode, character values greater
than 0x10ffff may be encountered. For these we set up a special record. */

#if PCRE2_CODE_UNIT_WIDTH == 32
const ucd_record PRIV(dummy_ucd_record)[] = {{
  ucp_Unknown,    /* script */
  ucp_Cn,         /* type unassigned */
  ucp_gbOther,    /* grapheme break property */
  0,              /* case set */
  0,              /* other case */
  0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
  0,              /* bool properties offset */
  }};
#endif

/* This table contains lists of characters that are caseless sets of
more than one character. Each list is terminated by NOTACHAR. */

const uint32_t PRIV(ucd_caseless_sets)[] = …;

/* When #included in pcre2test, we don't need the table of digit sets, nor the
the large main UCD tables. */

#ifndef PCRE2_PCRE2TEST

/* This table lists the code points for the '9' characters in each set of
decimal digits. It is used to ensure that all the digits in a script run come
from the same set. */

const uint32_t PRIV(ucd_digit_sets)[] = …;

/* This vector is a list of script bitsets for the Script Extension property.
The number of 32-bit words in each bitset is #defined in pcre2_ucp.h as
ucd_script_sets_item_size. */

const uint32_t PRIV(ucd_script_sets)[] = …;

/* This vector is a list of bitsets for Boolean properties. The number of
32_bit words in each bitset is #defined as ucd_boolprop_sets_item_size in
pcre2_ucp.h. */

const uint32_t PRIV(ucd_boolprop_sets)[] = …;

/* These are the main two-stage UCD tables. The fields in each record are:
script (8 bits), character type (8 bits), grapheme break property (8 bits),
offset to multichar other cases or zero (8 bits), offset to other case or zero
(32 bits, signed), bidi class (5 bits) and script extension (11 bits) packed
into a 16-bit field, and offset in binary properties table (16 bits). */

const ucd_record PRIV(ucd_records)[] = …;

const uint16_t PRIV(ucd_stage1)[] = …;

const uint16_t PRIV(ucd_stage2)[] = …#if UCD_BLOCK_SIZE != 128#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h#endif#endif  /* SUPPORT_UNICODE */#endif  /* PCRE2_PCRE2TEST */
godot/thirdparty/pcre2/src/pcre2_ucd.c