/************************************************* * Perl-Compatible Regular Expressions * *************************************************/ /* PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge New API code Copyright (c) 2016-2022 University of Cambridge This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! Instead, modify the maint/GenerateUcd.py script and run it to generate a new version of this code. ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ /* This file contains tables of Unicode properties that are extracted from Unicode data files. See the comments at the start of maint/GenerateUcd.py for details. As well as being part of the PCRE2 library, this file is #included by the pcre2test program, which redefines the PRIV macro to change table names from _pcre2_xxx to xxxx, thereby avoiding name clashes with the library. At present, just one of these tables is actually needed. When compiling the library, some headers are needed. */ #ifndef PCRE2_PCRE2TEST #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "pcre2_internal.h" #endif /* PCRE2_PCRE2TEST */ /* The tables herein are needed only when UCP support is built, and in PCRE2 that happens automatically with UTF support. This module should not be referenced otherwise, so it should not matter whether it is compiled or not. However a comment was received about space saving - maybe the guy linked all the modules rather than using a library - so we include a condition to cut out the tables when not needed. But don't leave a totally empty module because some compilers barf at that. Instead, just supply some small dummy tables. */ #ifndef SUPPORT_UNICODE const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}}; const uint16_t PRIV(ucd_stage1)[] = {0}; const uint16_t PRIV(ucd_stage2)[] = {0}; const uint32_t PRIV(ucd_caseless_sets)[] = {0}; #else /* Total size: 112564 bytes, block size: 128. */ const char *PRIV(unicode_version) = …; /* When recompiling tables with a new Unicode version, please check the types in this structure definition with those in pcre2_internal.h (the actual field names will be different). typedef struct { uint8_t property_0; uint8_t property_1; uint8_t property_2; uint8_t property_3; int32_t property_4; uint16_t property_5; uint16_t property_6; } ucd_record; */ /* If the 32-bit library is run in non-32-bit mode, character values greater than 0x10ffff may be encountered. For these we set up a special record. */ #if PCRE2_CODE_UNIT_WIDTH == 32 const ucd_record PRIV(dummy_ucd_record)[] = {{ ucp_Unknown, /* script */ ucp_Cn, /* type unassigned */ ucp_gbOther, /* grapheme break property */ 0, /* case set */ 0, /* other case */ 0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */ 0, /* bool properties offset */ }}; #endif /* This table contains lists of characters that are caseless sets of more than one character. Each list is terminated by NOTACHAR. */ const uint32_t PRIV(ucd_caseless_sets)[] = …; /* When #included in pcre2test, we don't need the table of digit sets, nor the the large main UCD tables. */ #ifndef PCRE2_PCRE2TEST /* This table lists the code points for the '9' characters in each set of decimal digits. It is used to ensure that all the digits in a script run come from the same set. */ const uint32_t PRIV(ucd_digit_sets)[] = …; /* This vector is a list of script bitsets for the Script Extension property. The number of 32-bit words in each bitset is #defined in pcre2_ucp.h as ucd_script_sets_item_size. */ const uint32_t PRIV(ucd_script_sets)[] = …; /* This vector is a list of bitsets for Boolean properties. The number of 32_bit words in each bitset is #defined as ucd_boolprop_sets_item_size in pcre2_ucp.h. */ const uint32_t PRIV(ucd_boolprop_sets)[] = …; /* These are the main two-stage UCD tables. The fields in each record are: script (8 bits), character type (8 bits), grapheme break property (8 bits), offset to multichar other cases or zero (8 bits), offset to other case or zero (32 bits, signed), bidi class (5 bits) and script extension (11 bits) packed into a 16-bit field, and offset in binary properties table (16 bits). */ const ucd_record PRIV(ucd_records)[] = …; const uint16_t PRIV(ucd_stage1)[] = …; const uint16_t PRIV(ucd_stage2)[] = …; #if UCD_BLOCK_SIZE != 128 #error Please correct UCD_BLOCK_SIZE in pcre2_internal.h #endif #endif /* SUPPORT_UNICODE */ #endif /* PCRE2_PCRE2TEST */ /* End of pcre2_ucd.c */