#include "src/regexp/regexp.h"
#include <cstdlib>
#include <memory>
#include <sstream>
#include "include/v8-context.h"
#include "include/v8-initialization.h"
#include "include/v8-isolate.h"
#include "include/v8-local-handle.h"
#include "src/api/api-inl.h"
#include "src/ast/ast.h"
#include "src/base/strings.h"
#include "src/codegen/assembler-arch.h"
#include "src/codegen/macro-assembler.h"
#include "src/init/v8.h"
#include "src/objects/js-regexp-inl.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-bytecode-generator.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-compiler.h"
#include "src/regexp/regexp-interpreter.h"
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-parser.h"
#include "src/strings/char-predicates-inl.h"
#include "src/strings/string-stream.h"
#include "src/strings/unicode-inl.h"
#include "src/utils/ostreams.h"
#include "src/zone/zone-list-inl.h"
#include "test/common/flag-utils.h"
#include "test/unittests/test-utils.h"
namespace v8 {
namespace internal {
TEST_F(TestWithNativeContext, ConvertRegExpFlagsToString) { … }
TEST_F(TestWithNativeContext, ConvertRegExpFlagsToStringNoFlags) { … }
TEST_F(TestWithNativeContext, ConvertRegExpFlagsToStringAllFlags) { … }
using RegExpTest = TestWithIsolate;
static bool CheckParse(const char* input) { … }
static void CheckParseEq(const char* input, const char* expected,
bool unicode = false) { … }
static bool CheckSimple(const char* input) { … }
struct MinMaxPair { … };
static MinMaxPair CheckMinMaxMatch(const char* input) { … }
#define CHECK_PARSE_ERROR …
#define CHECK_SIMPLE …
#define CHECK_MIN_MAX …
TEST_F(RegExpTest, RegExpParser) { … }
TEST_F(RegExpTest, ParserRegression) { … }
static void ExpectError(const char* input, const char* expected,
bool unicode = false) { … }
TEST_F(RegExpTest, Errors) { … }
static bool IsDigit(base::uc32 c) { … }
static bool NotDigit(base::uc32 c) { … }
static bool NotWhiteSpaceNorLineTermiantor(base::uc32 c) { … }
static bool NotWord(base::uc32 c) { … }
static bool NotLineTerminator(base::uc32 c) { … }
static void TestCharacterClassEscapes(StandardCharacterSet c,
bool(pred)(base::uc32 c)) { … }
TEST_F(RegExpTest, CharacterClassEscapes) { … }
static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
bool is_one_byte, Zone* zone) { … }
static void Execute(const char* input, bool multiline, bool unicode,
bool is_one_byte, bool dot_output = false) { … }
#ifdef DEBUG
TEST_F(RegExpTest, ParsePossessiveRepetition) { … }
#endif
#if V8_TARGET_ARCH_IA32
using ArchRegExpMacroAssembler = RegExpMacroAssemblerIA32;
#elif V8_TARGET_ARCH_X64
ArchRegExpMacroAssembler;
#elif V8_TARGET_ARCH_ARM
using ArchRegExpMacroAssembler = RegExpMacroAssemblerARM;
#elif V8_TARGET_ARCH_ARM64
using ArchRegExpMacroAssembler = RegExpMacroAssemblerARM64;
#elif V8_TARGET_ARCH_S390
using ArchRegExpMacroAssembler = RegExpMacroAssemblerS390;
#elif V8_TARGET_ARCH_PPC64
using ArchRegExpMacroAssembler = RegExpMacroAssemblerPPC;
#elif V8_TARGET_ARCH_MIPS64
using ArchRegExpMacroAssembler = RegExpMacroAssemblerMIPS;
#elif V8_TARGET_ARCH_LOONG64
using ArchRegExpMacroAssembler = RegExpMacroAssemblerLOONG64;
#elif V8_TARGET_ARCH_RISCV64
using ArchRegExpMacroAssembler = RegExpMacroAssemblerRISCV;
#elif V8_TARGET_ARCH_RISCV32
using ArchRegExpMacroAssembler = RegExpMacroAssemblerRISCV;
#endif
class ContextInitializer { … };
static Handle<JSRegExp> CreateJSRegExp(DirectHandle<String> source,
DirectHandle<Code> code,
bool is_unicode = false) { … }
static ArchRegExpMacroAssembler::Result Execute(
Tagged<JSRegExp> regexp, Tagged<String> input, int start_offset,
Address input_start, Address input_end, int* captures) { … }
TEST_F(RegExpTest, MacroAssemblerNativeSuccess) { … }
TEST_F(RegExpTest, MacroAssemblerNativeSimple) { … }
TEST_F(RegExpTest, MacroAssemblerNativeSimpleUC16) { … }
TEST_F(RegExpTest, MacroAssemblerNativeBacktrack) { … }
TEST_F(RegExpTest, MacroAssemblerNativeBackReferenceLATIN1) { … }
TEST_F(RegExpTest, MacroAssemblerNativeBackReferenceUC16) { … }
TEST_F(RegExpTest, MacroAssemblernativeAtStart) { … }
TEST_F(RegExpTest, MacroAssemblerNativeBackRefNoCase) { … }
TEST_F(RegExpTest, MacroAssemblerNativeRegisters) { … }
TEST_F(RegExpTest, MacroAssemblerStackOverflow) { … }
TEST_F(RegExpTest, MacroAssemblerNativeLotsOfRegisters) { … }
TEST_F(RegExpTest, MacroAssembler) { … }
#ifndef V8_INTL_SUPPORT
static base::uc32 canonicalize(base::uc32 c) {
unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, nullptr);
if (count == 0) {
return c;
} else {
CHECK_EQ(1, count);
return canon[0];
}
}
TEST_F(RegExpTest, LatinCanonicalize) {
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
unibrow::uchar upper = lower + ('A' - 'a');
CHECK_EQ(canonicalize(lower), canonicalize(upper));
unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int length = un_canonicalize.get(lower, '\0', uncanon);
CHECK_EQ(2, length);
CHECK_EQ(upper, uncanon[0]);
CHECK_EQ(lower, uncanon[1]);
}
for (base::uc32 c = 128; c < (1 << 21); c++) CHECK_GE(canonicalize(c), 128);
unibrow::Mapping<unibrow::ToUppercase> to_upper;
for (base::uc32 c = 0; c < (1 << 16); c++) {
unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
int length = to_upper.get(c, '\0', upper);
if (length == 0) {
length = 1;
upper[0] = c;
}
base::uc32 u = upper[0];
if (length > 1 || (c >= 128 && u < 128)) u = c;
CHECK_EQ(u, canonicalize(c));
}
}
static base::uc32 CanonRangeEnd(base::uc32 c) {
unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, nullptr);
if (count == 0) {
return c;
} else {
CHECK_EQ(1, count);
return canon[0];
}
}
TEST_F(RegExpTest, RangeCanonicalization) {
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
int block_start = 0;
while (block_start <= 0xFFFF) {
base::uc32 block_end = CanonRangeEnd(block_start);
unsigned block_length = block_end - block_start + 1;
if (block_length > 1) {
unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int first_length = un_canonicalize.get(block_start, '\0', first);
for (unsigned i = 1; i < block_length; i++) {
unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
CHECK_EQ(first_length, succ_length);
for (int j = 0; j < succ_length; j++) {
int calc = first[j] + i;
int found = succ[j];
CHECK_EQ(calc, found);
}
}
}
block_start = block_start + block_length;
}
}
TEST_F(RegExpTest, UncanonicalizeEquivalence) {
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
for (int i = 0; i < (1 << 16); i++) {
int length = un_canonicalize.get(i, '\0', chars);
for (int j = 0; j < length; j++) {
unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int length2 = un_canonicalize.get(chars[j], '\0', chars2);
CHECK_EQ(length, length2);
for (int k = 0; k < length; k++)
CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
}
}
}
#endif
static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
base::Vector<CharacterRange> expected) { … }
static void TestSimpleRangeCaseIndependence(Isolate* isolate,
CharacterRange input,
CharacterRange expected) { … }
TEST_F(RegExpTest, CharacterRangeCaseIndependence) { … }
static bool InClass(base::uc32 c,
const UnicodeRangeSplitter::CharacterRangeVector* ranges) { … }
TEST_F(RegExpTest, UnicodeRangeSplitter) { … }
TEST_F(RegExpTest, CanonicalizeCharacterSets) { … }
TEST_F(RegExpTest, CharacterRangeMerge) { … }
TEST_F(RegExpTest, Graph) { … }
namespace {
int* global_use_counts = …;
void MockUseCounterCallback(v8::Isolate* isolate,
v8::Isolate::UseCounterFeature feature) { … }
}
using RegExpTestWithContext = TestWithContext;
TEST_F(RegExpTestWithContext, UseCountRegExp) { … }
class UncachedExternalStringResource
: public v8::String::ExternalOneByteStringResource { … };
TEST_F(RegExpTestWithContext, UncachedExternalString) { … }
void CreatePeepholeNoChangeBytecode(RegExpMacroAssembler* m) { … }
TEST_F(RegExpTest, PeepholeNoChange) { … }
void CreatePeepholeSkipUntilCharBytecode(RegExpMacroAssembler* m) { … }
TEST_F(RegExpTest, PeepholeSkipUntilChar) { … }
void CreatePeepholeSkipUntilBitInTableBytecode(RegExpMacroAssembler* m,
Factory* factory) { … }
TEST_F(RegExpTest, PeepholeSkipUntilBitInTable) { … }
void CreatePeepholeSkipUntilCharPosCheckedBytecode(RegExpMacroAssembler* m) { … }
TEST_F(RegExpTest, PeepholeSkipUntilCharPosChecked) { … }
void CreatePeepholeSkipUntilCharAndBytecode(RegExpMacroAssembler* m) { … }
TEST_F(RegExpTest, PeepholeSkipUntilCharAnd) { … }
void CreatePeepholeSkipUntilCharOrCharBytecode(RegExpMacroAssembler* m) { … }
TEST_F(RegExpTest, PeepholeSkipUntilCharOrChar) { … }
void CreatePeepholeSkipUntilGtOrNotBitInTableBytecode(RegExpMacroAssembler* m,
Factory* factory) { … }
TEST_F(RegExpTest, PeepholeSkipUntilGtOrNotBitInTable) { … }
void CreatePeepholeLabelFixupsInsideBytecode(RegExpMacroAssembler* m,
Label* dummy_before,
Label* dummy_after,
Label* dummy_inside) { … }
TEST_F(RegExpTest, PeepholeLabelFixupsInside) { … }
void CreatePeepholeLabelFixupsComplexBytecode(RegExpMacroAssembler* m,
Label* dummy_before,
Label* dummy_between,
Label* dummy_after,
Label* dummy_inside) { … }
TEST_F(RegExpTest, PeepholeLabelFixupsComplex) { … }
TEST_F(RegExpTestWithContext, UnicodePropertyEscapeCodeSize) { … }
namespace {
struct RegExpExecData { … };
i::Handle<i::Object> RegExpExec(const RegExpExecData* d) { … }
void ReenterRegExp(v8::Isolate* isolate, void* data) { … }
}
TEST_F(RegExpTestWithContext, RegExpInterruptReentrantExecution) { … }
#undef CHECK_PARSE_ERROR
#undef CHECK_SIMPLE
#undef CHECK_MIN_MAX
}
}