#include "src/common/globals.h"
#include "src/execution/isolate.h"
#include "src/objects/string.h"
#include "src/regexp/regexp-compiler.h"
#include "src/regexp/regexp.h"
#include "src/strings/unicode-inl.h"
#include "src/zone/zone-list-inl.h"
#ifdef V8_INTL_SUPPORT
#include "src/base/strings.h"
#include "src/regexp/special-case.h"
#include "unicode/locid.h"
#include "unicode/uniset.h"
#include "unicode/utypes.h"
#endif
namespace v8 {
namespace internal {
usingnamespaceregexp_compiler_constants;
constexpr base::uc32 kMaxCodePoint = …;
constexpr int kMaxUtf16CodeUnit = …;
constexpr uint32_t kMaxUtf16CodeUnitU = …;
RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
namespace {
bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
const int* special_class, int length) { … }
bool CompareRanges(ZoneList<CharacterRange>* ranges, const int* special_class,
int length) { … }
}
bool RegExpClassRanges::is_standard(Zone* zone) { … }
UnicodeRangeSplitter::UnicodeRangeSplitter(ZoneList<CharacterRange>* base) { … }
void UnicodeRangeSplitter::AddRange(CharacterRange range) { … }
namespace {
ZoneList<CharacterRange>* ToCanonicalZoneList(
const UnicodeRangeSplitter::CharacterRangeVector* v, Zone* zone) { … }
void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success, UnicodeRangeSplitter* splitter) { … }
UC16Range;
constexpr UC16Range ToUC16Range(base::uc16 from, base::uc16 to) { … }
constexpr base::uc16 ExtractFrom(UC16Range r) { … }
constexpr base::uc16 ExtractTo(UC16Range r) { … }
void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) { … }
RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
ZoneList<CharacterRange>* match, RegExpNode* on_success,
bool read_backward) { … }
RegExpNode* MatchAndNegativeLookaroundInReadDirection(
RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
bool read_backward) { … }
void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) { … }
void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) { … }
RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
}
void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
Zone* zone) { … }
RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpNode* RegExpClassSetOperand::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpNode* RegExpClassSetExpression::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
void RegExpClassSetOperand::Union(RegExpClassSetOperand* other, Zone* zone) { … }
void RegExpClassSetOperand::Intersect(RegExpClassSetOperand* other,
ZoneList<CharacterRange>* temp_ranges,
Zone* zone) { … }
void RegExpClassSetOperand::Subtract(RegExpClassSetOperand* other,
ZoneList<CharacterRange>* temp_ranges,
Zone* zone) { … }
RegExpClassSetOperand* RegExpClassSetExpression::ComputeExpression(
RegExpTree* root, ZoneList<CharacterRange>* temp_ranges, Zone* zone) { … }
namespace {
int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { … }
#ifdef V8_INTL_SUPPORT
int CompareCaseInsensitive(const icu::UnicodeString& a,
const icu::UnicodeString& b) { … }
int CompareFirstCharCaseInsensitive(RegExpTree* const* a,
RegExpTree* const* b) { … }
bool Equals(bool ignore_case, const icu::UnicodeString& a,
const icu::UnicodeString& b) { … }
bool CharAtEquals(bool ignore_case, int index, const RegExpAtom* a,
const RegExpAtom* b) { … }
#else
unibrow::uchar Canonical(
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
unibrow::uchar c) {
unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
int length = canonicalize->get(c, '\0', chars);
DCHECK_LE(length, 1);
unibrow::uchar canonical = c;
if (length == 1) canonical = chars[0];
return canonical;
}
int CompareCaseInsensitive(
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
unibrow::uchar a, unibrow::uchar b) {
if (a == b) return 0;
if (a >= 'a' || b >= 'a') {
a = Canonical(canonicalize, a);
b = Canonical(canonicalize, b);
}
return static_cast<int>(a) - static_cast<int>(b);
}
int CompareFirstCharCaseInsensitive(
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
RegExpTree* const* a, RegExpTree* const* b) {
RegExpAtom* atom1 = (*a)->AsAtom();
RegExpAtom* atom2 = (*b)->AsAtom();
return CompareCaseInsensitive(canonicalize, atom1->data().at(0),
atom2->data().at(0));
}
bool Equals(bool ignore_case,
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
unibrow::uchar a, unibrow::uchar b) {
if (a == b) return true;
if (ignore_case) {
return CompareCaseInsensitive(canonicalize, a, b) == 0;
}
return false;
}
bool CharAtEquals(bool ignore_case,
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
int index, const RegExpAtom* a, const RegExpAtom* b) {
return Equals(ignore_case, canonicalize, a->data().at(index),
b->data().at(index));
}
#endif
}
bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) { … }
void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { … }
void RegExpDisjunction::FixSingleCharacterDisjunctions(
RegExpCompiler* compiler) { … }
RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
namespace {
RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpAssertion::Type type) { … }
}
RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
namespace {
class V8_NODISCARD ModifiersScope { … };
}
RegExpNode* RegExpGroup::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
int stack_pointer_register,
int position_register,
int capture_register_count,
int capture_register_start)
: … { … }
RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) { … }
RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
RegExpNode* RegExpCapture::ToNode(RegExpTree* body, int index,
RegExpCompiler* compiler,
RegExpNode* on_success) { … }
namespace {
class AssertionSequenceRewriter final { … };
}
RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) { … }
namespace {
void AddClass(const int* elmv, int elmc, ZoneList<CharacterRange>* ranges,
Zone* zone) { … }
void AddClassNegated(const int* elmv, int elmc,
ZoneList<CharacterRange>* ranges, Zone* zone) { … }
}
void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set,
ZoneList<CharacterRange>* ranges,
bool add_unicode_case_equivalents,
Zone* zone) { … }
void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
ZoneList<CharacterRange>* ranges,
bool is_one_byte) { … }
bool CharacterRange::IsCanonical(const ZoneList<CharacterRange>* ranges) { … }
ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) { … }
namespace {
void MoveRanges(ZoneList<CharacterRange>* list, int from, int to, int count) { … }
int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
CharacterRange insert) { … }
}
void CharacterSet::Canonicalize() { … }
void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) { … }
void CharacterRange::Negate(const ZoneList<CharacterRange>* ranges,
ZoneList<CharacterRange>* negated_ranges,
Zone* zone) { … }
void CharacterRange::Intersect(const ZoneList<CharacterRange>* lhs,
const ZoneList<CharacterRange>* rhs,
ZoneList<CharacterRange>* intersection,
Zone* zone) { … }
namespace {
void SafeAdvanceRange(const ZoneList<CharacterRange>* range, int* index,
base::uc32* from, base::uc32* to) { … }
}
void CharacterRange::Subtract(const ZoneList<CharacterRange>* src,
const ZoneList<CharacterRange>* to_remove,
ZoneList<CharacterRange>* result, Zone* zone) { … }
void CharacterRange::ClampToOneByte(ZoneList<CharacterRange>* ranges) { … }
bool CharacterRange::Equals(const ZoneList<CharacterRange>* lhs,
const ZoneList<CharacterRange>* rhs) { … }
namespace {
class RegExpExpansionLimiter { … };
}
RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
RegExpTree* body, RegExpCompiler* compiler,
RegExpNode* on_success,
bool not_at_start) { … }
}
}