#if !(defined SUPPORT_VALGRIND)
#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
|| (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64))
vector_compare_type;
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
{ … }
#else
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return 15;
#elif PCRE2_CODE_UNIT_WIDTH == 16
return 7;
#elif PCRE2_CODE_UNIT_WIDTH == 32
return 3;
#else
#error "Unsupported unit width"
#endif
}
#endif
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
{ … }
#endif
#endif
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
{ … }
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
sljit_s32 reg_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
{ … }
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD …
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{ … }
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD …
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
{ … }
#ifndef _WIN64
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD …
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
{ … }
#endif
#undef SIMD_COMPARE_TYPE_INDEX
#endif
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
#include <arm_neon.h>
typedef union {
unsigned int x;
struct { unsigned char c1, c2, c3, c4; } c;
} int_char;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
static SLJIT_INLINE int utf_continue(PCRE2_SPTR s)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return (*s & 0xc0) == 0x80;
#elif PCRE2_CODE_UNIT_WIDTH == 16
return (*s & 0xfc00) == 0xdc00;
#else
#error "Unknown code width"
#endif
}
#endif
#if PCRE2_CODE_UNIT_WIDTH == 8
#define VECTOR_FACTOR …
#define vect_t …
#define VLD1Q …
#define VCEQQ …
#define VORRQ …
#define VST1Q …
#define VDUPQ …
#define VEXTQ …
#define VANDQ …
typedef union {
uint8_t mem[16];
uint64_t dw[2];
} quad_word;
#elif PCRE2_CODE_UNIT_WIDTH == 16
#define VECTOR_FACTOR …
#define vect_t …
#define VLD1Q …
#define VCEQQ …
#define VORRQ …
#define VST1Q …
#define VDUPQ …
#define VEXTQ …
#define VANDQ …
typedef union {
uint16_t mem[8];
uint64_t dw[2];
} quad_word;
#else
#define VECTOR_FACTOR …
#define vect_t …
#define VLD1Q …
#define VCEQQ …
#define VORRQ …
#define VST1Q …
#define VDUPQ …
#define VEXTQ …
#define VANDQ …
typedef union {
uint32_t mem[4];
uint64_t dw[2];
} quad_word;
#endif
#define FFCS
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
#define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS
#define FFCS_2
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
#define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS_2
#define FFCS_MASK
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
#define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS_MASK
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD …
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
int_char ic;
struct sljit_jump *partial_quit, *quit;
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0);
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
if (char1 == char2)
{
ic.c.c1 = char1;
ic.c.c2 = char2;
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf));
else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
#endif
}
else
{
PCRE2_UCHAR mask = char1 ^ char2;
if (is_powerof2(mask))
{
ic.c.c1 = char1 | mask;
ic.c.c2 = mask;
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf));
else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
#endif
}
else
{
ic.c.c1 = char1;
ic.c.c2 = char2;
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf));
else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
#endif
}
}
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
if (common->mode != PCRE2_JIT_COMPLETE)
{
quit = CMP(SLJIT_NOT_ZERO, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
JUMPHERE(partial_quit);
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
JUMPHERE(quit);
}
}
typedef enum {
compare_match1,
compare_match1i,
compare_match2,
} compare_type;
static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
{
if (ctype == compare_match2)
{
vect_t tmp = dst;
dst = VCEQQ(dst, cmp1);
tmp = VCEQQ(tmp, cmp2);
dst = VORRQ(dst, tmp);
return dst;
}
if (ctype == compare_match1i)
dst = VORRQ(dst, cmp2);
dst = VCEQQ(dst, cmp1);
return dst;
}
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return 15;
#elif PCRE2_CODE_UNIT_WIDTH == 16
return 7;
#elif PCRE2_CODE_UNIT_WIDTH == 32
return 3;
#else
#error "Unsupported unit width"
#endif
}
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
{
vect_t zero = VDUPQ(0);
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
#define C …
switch (n)
{
C(1); C(2); C(3);
#if PCRE2_CODE_UNIT_WIDTH != 32
C(4); C(5); C(6); C(7);
# if PCRE2_CODE_UNIT_WIDTH != 16
C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
# endif
#endif
default:
return a;
}
}
#define FFCPS
#define FFCPS_DIFF1
#define FFCPS_CHAR1A2A
#define FFCPS_0
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
#define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCPS_0
#undef FFCPS_CHAR1A2A
#define FFCPS_1
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
#define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCPS_1
#undef FFCPS_DIFF1
#define FFCPS_DEFAULT
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
#define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCPS
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD …
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
{
DEFINE_COMPILER;
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
struct sljit_jump *partial_quit;
int_char ic;
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
SLJIT_ASSERT(compiler->scratches == 5);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
if (common->match_end_ptr == 0)
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
else
{
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0);
SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0);
}
GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0);
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
ic.c.c1 = char1a;
ic.c.c2 = char1b;
ic.c.c3 = char2a;
ic.c.c4 = char2b;
OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
if (diff == 1) {
if (char1a == char1b && char2a == char2b) {
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf));
else
#endif
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0));
} else {
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf));
else
#endif
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1));
}
} else {
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf));
else
#endif
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default));
}
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
add_jump(compiler, &common->failed_match, partial_quit);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
JUMPHERE(partial_quit);
}
#endif
#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#if PCRE2_CODE_UNIT_WIDTH == 8
#define VECTOR_ELEMENT_SIZE …
#elif PCRE2_CODE_UNIT_WIDTH == 16
#define VECTOR_ELEMENT_SIZE …
#elif PCRE2_CODE_UNIT_WIDTH == 32
#define VECTOR_ELEMENT_SIZE …
#else
#error "Unsupported unit width"
#endif
static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
sljit_s32 base_reg, sljit_s32 index_reg)
{
sljit_u16 instruction[3];
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
instruction[1] = (sljit_u16)(base_reg << 12);
instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
sljit_emit_op_custom(compiler, instruction, 6);
}
#if PCRE2_CODE_UNIT_WIDTH == 32
static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg,
PCRE2_UCHAR chr, sljit_s32 tmp_general_reg)
{
sljit_u16 instruction[3];
SLJIT_ASSERT(step >= 0 && step <= 1);
if (chr < 0x7fff)
{
if (step == 1)
return;
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
instruction[1] = (sljit_u16)chr;
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
sljit_emit_op_custom(compiler, instruction, 6);
return;
}
if (step == 0)
{
OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_GP_REGISTER, tmp_general_reg));
instruction[1] = 0;
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
sljit_emit_op_custom(compiler, instruction, 6);
return;
}
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
instruction[1] = 0;
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
sljit_emit_op_custom(compiler, instruction, 6);
}
#endif
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
{
sljit_u16 instruction[3];
SLJIT_ASSERT(step >= 0 && step <= 2);
if (step == 1)
{
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
instruction[1] = (sljit_u16)(cmp1_ind << 12);
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
sljit_emit_op_custom(compiler, instruction, 6);
return;
}
if (compare_type != vector_compare_match2)
{
if (step == 0 && compare_type == vector_compare_match1i)
{
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
instruction[1] = (sljit_u16)(cmp2_ind << 12);
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
sljit_emit_op_custom(compiler, instruction, 6);
}
return;
}
switch (step)
{
case 0:
instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
instruction[1] = (sljit_u16)(cmp2_ind << 12);
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
sljit_emit_op_custom(compiler, instruction, 6);
return;
case 2:
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
instruction[1] = (sljit_u16)(tmp_ind << 12);
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
sljit_emit_op_custom(compiler, instruction, 6);
return;
}
}
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD …
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
sljit_u16 instruction[3];
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
#endif
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
vector_compare_type compare_type = vector_compare_match1;
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
sljit_s32 cmp2_ind = 3;
sljit_s32 zero_ind = 4;
sljit_u32 bit = 0;
int i;
SLJIT_UNUSED_ARG(offset);
if (char1 != char2)
{
bit = char1 ^ char2;
compare_type = vector_compare_match1i;
if (!is_powerof2(bit))
{
bit = 0;
compare_type = vector_compare_match2;
}
}
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[0]);
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
#if PCRE2_CODE_UNIT_WIDTH != 32
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
instruction[1] = (sljit_u16)(char1 | bit);
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
sljit_emit_op_custom(compiler, instruction, 6);
if (char1 != char2)
{
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
sljit_emit_op_custom(compiler, instruction, 6);
}
#else
for (int i = 0; i < 2; i++)
{
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
if (char1 != char2)
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
}
#endif
if (compare_type == vector_compare_match2)
{
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
instruction[1] = 0;
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
sljit_emit_op_custom(compiler, instruction, 6);
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
restart = LABEL();
#endif
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
if (compare_type != vector_compare_match2)
{
if (compare_type == vector_compare_match1i)
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
sljit_emit_op_custom(compiler, instruction, 6);
}
else
{
for (i = 0; i < 3; i++)
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
sljit_emit_op_custom(compiler, instruction, 6);
}
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
instruction[1] = 7;
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
sljit_emit_op_custom(compiler, instruction, 6);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[1]);
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
if (compare_type != vector_compare_match2)
{
if (compare_type == vector_compare_match1i)
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
sljit_emit_op_custom(compiler, instruction, 6);
}
else
{
for (i = 0; i < 3; i++)
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
sljit_emit_op_custom(compiler, instruction, 6);
}
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
JUMPTO(SLJIT_OVERFLOW, start);
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
instruction[1] = 7;
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
sljit_emit_op_custom(compiler, instruction, 6);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
JUMPHERE(quit);
if (common->mode != PCRE2_JIT_COMPLETE)
{
JUMPHERE(partial_quit[0]);
JUMPHERE(partial_quit[1]);
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
}
else
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
{
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
quit = jump_if_utf_char_start(compiler, TMP1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
JUMPTO(SLJIT_JUMP, restart);
JUMPHERE(quit);
}
#endif
}
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD …
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
{
DEFINE_COMPILER;
sljit_u16 instruction[3];
struct sljit_label *start;
struct sljit_jump *quit;
jump_list *not_found = NULL;
vector_compare_type compare_type = vector_compare_match1;
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP3);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
sljit_s32 cmp2_ind = 3;
sljit_s32 zero_ind = 4;
sljit_u32 bit = 0;
int i;
if (char1 != char2)
{
bit = char1 ^ char2;
compare_type = vector_compare_match1i;
if (!is_powerof2(bit))
{
bit = 0;
compare_type = vector_compare_match2;
}
}
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16);
#if PCRE2_CODE_UNIT_WIDTH != 32
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
instruction[1] = (sljit_u16)(char1 | bit);
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
sljit_emit_op_custom(compiler, instruction, 6);
if (char1 != char2)
{
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
sljit_emit_op_custom(compiler, instruction, 6);
}
#else
for (int i = 0; i < 2; i++)
{
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
if (char1 != char2)
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
}
#endif
if (compare_type == vector_compare_match2)
{
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
instruction[1] = 0;
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
sljit_emit_op_custom(compiler, instruction, 6);
}
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
if (compare_type != vector_compare_match2)
{
if (compare_type == vector_compare_match1i)
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
sljit_emit_op_custom(compiler, instruction, 6);
}
else
{
for (i = 0; i < 3; i++)
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
sljit_emit_op_custom(compiler, instruction, 6);
}
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
instruction[1] = 7;
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
sljit_emit_op_custom(compiler, instruction, 6);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16);
start = LABEL();
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16);
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
if (compare_type != vector_compare_match2)
{
if (compare_type == vector_compare_match1i)
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
sljit_emit_op_custom(compiler, instruction, 6);
}
else
{
for (i = 0; i < 3; i++)
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
sljit_emit_op_custom(compiler, instruction, 6);
}
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
JUMPTO(SLJIT_OVERFLOW, start);
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
instruction[1] = 7;
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
sljit_emit_op_custom(compiler, instruction, 6);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
JUMPHERE(quit);
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
return not_found;
}
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD …
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
{
DEFINE_COMPILER;
sljit_u16 instruction[3];
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
#endif
struct sljit_jump *quit;
struct sljit_jump *jump[2];
vector_compare_type compare1_type = vector_compare_match1;
vector_compare_type compare2_type = vector_compare_match1;
sljit_u32 bit1 = 0;
sljit_u32 bit2 = 0;
sljit_s32 diff = IN_UCHARS(offs2 - offs1);
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data1_ind = 0;
sljit_s32 data2_ind = 1;
sljit_s32 tmp1_ind = 2;
sljit_s32 tmp2_ind = 3;
sljit_s32 cmp1a_ind = 4;
sljit_s32 cmp1b_ind = 5;
sljit_s32 cmp2a_ind = 6;
sljit_s32 cmp2b_ind = 7;
sljit_s32 zero_ind = 8;
int i;
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
if (char1a != char1b)
{
bit1 = char1a ^ char1b;
compare1_type = vector_compare_match1i;
if (!is_powerof2(bit1))
{
bit1 = 0;
compare1_type = vector_compare_match2;
}
}
if (char2a != char2b)
{
bit2 = char2a ^ char2b;
compare2_type = vector_compare_match1i;
if (!is_powerof2(bit2))
{
bit2 = 0;
compare2_type = vector_compare_match2;
}
}
if (common->match_end_ptr != 0)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
}
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
#if PCRE2_CODE_UNIT_WIDTH != 32
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
instruction[1] = (sljit_u16)(char1a | bit1);
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
sljit_emit_op_custom(compiler, instruction, 6);
if (char1a != char1b)
{
instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
sljit_emit_op_custom(compiler, instruction, 6);
}
instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
instruction[1] = (sljit_u16)(char2a | bit2);
sljit_emit_op_custom(compiler, instruction, 6);
if (char2a != char2b)
{
instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
sljit_emit_op_custom(compiler, instruction, 6);
}
#else
for (int i = 0; i < 2; i++)
{
replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
if (char1a != char1b)
replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
if (char2a != char2b)
replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
}
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
#endif
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
instruction[1] = 0;
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
sljit_emit_op_custom(compiler, instruction, 6);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
restart = LABEL();
#endif
jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
jump[1] = JUMP(SLJIT_JUMP);
JUMPHERE(jump[0]);
load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
JUMPHERE(jump[1]);
load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
for (i = 0; i < 3; i++)
{
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
}
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
instruction[1] = (sljit_u16)(data2_ind << 12);
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
sljit_emit_op_custom(compiler, instruction, 6);
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
sljit_emit_op_custom(compiler, instruction, 6);
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
instruction[1] = 7;
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
sljit_emit_op_custom(compiler, instruction, 6);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
for (i = 0; i < 3; i++)
{
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
}
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
instruction[1] = (sljit_u16)(data2_ind << 12);
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
sljit_emit_op_custom(compiler, instruction, 6);
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
sljit_emit_op_custom(compiler, instruction, 6);
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
JUMPTO(SLJIT_OVERFLOW, start);
instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
instruction[1] = 7;
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
sljit_emit_op_custom(compiler, instruction, 6);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
JUMPHERE(quit);
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
{
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
quit = jump_if_utf_char_start(compiler, TMP1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
JUMPTO(SLJIT_JUMP, restart);
JUMPHERE(quit);
}
#endif
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}
#endif
#if (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
#ifdef __linux__
#include <sys/auxv.h>
#define LOONGARCH_HWCAP_LSX …
#define HAS_LSX_SUPPORT …
#else
#define HAS_LSX_SUPPORT …
#endif
typedef sljit_ins sljit_u32;
#define SI12_IMM_MASK …
#define UI5_IMM_MASK …
#define UI2_IMM_MASK …
#define VD …
#define VJ …
#define VK …
#define RD_V …
#define RJ_V …
#define IMM_SI12 …
#define IMM_UI5 …
#define IMM_UI2 …
#define VLD …
#define VOR_V …
#define VAND_V …
#define VBSLL_V …
#define VMSKLTZ_B …
#define VPICKVE2GR_WU …
#if PCRE2_CODE_UNIT_WIDTH == 8
#define VREPLGR2VR …
#define VSEQ …
#elif PCRE2_CODE_UNIT_WIDTH == 16
#define VREPLGR2VR …
#define VSEQ …
#else
#define VREPLGR2VR …
#define VSEQ …
#endif
static void fast_forward_char_pair_lsx_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
{
if (compare_type != vector_compare_match2)
{
if (compare_type == vector_compare_match1i)
{
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(cmp2_ind) | VK(dst_ind));
}
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
return;
}
push_inst(compiler, VBSLL_V | VD(tmp_ind) | VJ(dst_ind) | IMM_UI5(0));
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
push_inst(compiler, VSEQ | VD(tmp_ind) | VJ(tmp_ind) | VK(cmp2_ind));
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(tmp_ind) | VK(dst_ind));
return;
}
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD …
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
#endif
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
vector_compare_type compare_type = vector_compare_match1;
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
sljit_s32 cmp2_ind = 3;
sljit_u32 bit = 0;
SLJIT_UNUSED_ARG(offset);
if (char1 != char2)
{
bit = char1 ^ char2;
compare_type = vector_compare_match1i;
if (!is_powerof2(bit))
{
bit = 0;
compare_type = vector_compare_match2;
}
}
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[0]);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
if (char1 != char2)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
}
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
restart = LABEL();
#endif
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[1]);
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
JUMPHERE(quit);
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
if (common->mode != PCRE2_JIT_COMPLETE)
{
JUMPHERE(partial_quit[0]);
JUMPHERE(partial_quit[1]);
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
}
else
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
{
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
quit = jump_if_utf_char_start(compiler, TMP1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, restart);
JUMPHERE(quit);
}
#endif
}
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD …
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
{
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *quit;
jump_list *not_found = NULL;
vector_compare_type compare_type = vector_compare_match1;
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
sljit_s32 cmp2_ind = 3;
sljit_u32 bit = 0;
if (char1 != char2)
{
bit = char1 ^ char2;
compare_type = vector_compare_match1i;
if (!is_powerof2(bit))
{
bit = 0;
compare_type = vector_compare_match2;
}
}
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
if (char1 != char2)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
}
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
JUMPHERE(quit);
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
return not_found;
}
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD …
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
{
DEFINE_COMPILER;
vector_compare_type compare1_type = vector_compare_match1;
vector_compare_type compare2_type = vector_compare_match1;
sljit_u32 bit1 = 0;
sljit_u32 bit2 = 0;
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data1_ind = 0;
sljit_s32 data2_ind = 1;
sljit_s32 tmp1_ind = 2;
sljit_s32 tmp2_ind = 3;
sljit_s32 cmp1a_ind = 4;
sljit_s32 cmp1b_ind = 5;
sljit_s32 cmp2a_ind = 6;
sljit_s32 cmp2b_ind = 7;
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
#endif
struct sljit_jump *jump[2];
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
if (common->match_end_ptr != 0)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
}
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
if (char1a == char1b)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
else
{
bit1 = char1a ^ char1b;
if (is_powerof2(bit1))
{
compare1_type = vector_compare_match1i;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a | bit1);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit1);
}
else
{
compare1_type = vector_compare_match2;
bit1 = 0;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char1b);
}
}
push_inst(compiler, VREPLGR2VR | VD(cmp1a_ind) | RJ_V(tmp1_reg_ind));
if (char1a != char1b)
{
push_inst(compiler, VREPLGR2VR | VD(cmp1b_ind) | RJ_V(tmp2_reg_ind));
}
if (char2a == char2b)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
else
{
bit2 = char2a ^ char2b;
if (is_powerof2(bit2))
{
compare2_type = vector_compare_match1i;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a | bit2);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit2);
}
else
{
compare2_type = vector_compare_match2;
bit2 = 0;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char2b);
}
}
push_inst(compiler, VREPLGR2VR | VD(cmp2a_ind) | RJ_V(tmp1_reg_ind));
if (char2a != char2b)
{
push_inst(compiler, VREPLGR2VR | VD(cmp2b_ind) | RJ_V(tmp2_reg_ind));
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
restart = LABEL();
#endif
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
jump[1] = JUMP(SLJIT_JUMP);
JUMPHERE(jump[0]);
push_inst(compiler, VBSLL_V | VD(data2_ind) | VJ(data1_ind) | IMM_UI5(diff));
JUMPHERE(jump[1]);
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
push_inst(compiler, VOR_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
push_inst(compiler, VAND_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
JUMPHERE(jump[0]);
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
{
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
jump[0] = jump_if_utf_char_start(compiler, TMP1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
JUMPHERE(jump[0]);
}
#endif
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}
#endif
#endif