chromium/v8/src/regexp/regexp-interpreter.cc

// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// A simple interpreter for the Irregexp byte code.

#include "src/regexp/regexp-interpreter.h"

#include "src/base/small-vector.h"
#include "src/base/strings.h"
#include "src/execution/isolate.h"
#include "src/logging/counters.h"
#include "src/objects/js-regexp-inl.h"
#include "src/objects/string-inl.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"  // For kMaximumStackSize.
#include "src/regexp/regexp.h"
#include "src/strings/unicode.h"
#include "src/utils/memcopy.h"
#include "src/utils/utils.h"

#ifdef V8_INTL_SUPPORT
#include "unicode/uchar.h"
#endif  // V8_INTL_SUPPORT

// Use token threaded dispatch iff the compiler supports computed gotos and the
// build argument v8_enable_regexp_interpreter_threaded_dispatch was set.
#if V8_HAS_COMPUTED_GOTO && \
    defined(V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH)
#define V8_USE_COMPUTED_GOTO
#endif  // V8_HAS_COMPUTED_GOTO

namespace v8 {
namespace internal {

namespace {

bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
                          base::Vector<const base::uc16> subject,
                          bool unicode) {}

bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
                          base::Vector<const uint8_t> subject, bool unicode) {}

#ifdef DEBUG
void MaybeTraceInterpreter(const uint8_t* code_base, const uint8_t* pc,
                           int stack_depth, int current_position,
                           uint32_t current_char, int bytecode_length,
                           const char* bytecode_name) {}
#endif  // DEBUG

int32_t Load32Aligned(const uint8_t* pc) {}

uint32_t Load16AlignedUnsigned(const uint8_t* pc) {}

int32_t Load16AlignedSigned(const uint8_t* pc) {}

// Helpers to access the packed argument. Takes the 32 bits containing the
// current bytecode, where the 8 LSB contain the bytecode and the rest contains
// a packed 24-bit argument.
// TODO(jgruber): Specify signed-ness in bytecode signature declarations, and
// police restrictions during bytecode generation.
int32_t LoadPacked24Signed(int32_t bytecode_and_packed_arg) {}
uint32_t LoadPacked24Unsigned(int32_t bytecode_and_packed_arg) {}

// A simple abstraction over the backtracking stack used by the interpreter.
//
// Despite the name 'backtracking' stack, it's actually used as a generic stack
// that stores both program counters (= offsets into the bytecode) and generic
// integer values.
class BacktrackStack {};

// Registers used during interpreter execution. These consist of output
// registers in indices [0, output_register_count[ which will contain matcher
// results as a {start,end} index tuple for each capture (where the whole match
// counts as implicit capture 0); and internal registers in indices
// [output_register_count, total_register_count[.
class InterpreterRegisters {};

IrregexpInterpreter::Result ThrowStackOverflow(Isolate* isolate,
                                               RegExp::CallOrigin call_origin) {}

// Only throws if called from the runtime, otherwise just returns the EXCEPTION
// status code.
IrregexpInterpreter::Result MaybeThrowStackOverflow(
    Isolate* isolate, RegExp::CallOrigin call_origin) {}

template <typename Char>
void UpdateCodeAndSubjectReferences(
    Isolate* isolate, DirectHandle<TrustedByteArray> code_array,
    DirectHandle<String> subject_string,
    Tagged<TrustedByteArray>* code_array_out, const uint8_t** code_base_out,
    const uint8_t** pc_out, Tagged<String>* subject_string_out,
    base::Vector<const Char>* subject_string_vector_out) {}

// Runs all pending interrupts and updates unhandlified object references if
// necessary.
template <typename Char>
IrregexpInterpreter::Result HandleInterrupts(
    Isolate* isolate, RegExp::CallOrigin call_origin,
    Tagged<TrustedByteArray>* code_array_out,
    Tagged<String>* subject_string_out, const uint8_t** code_base_out,
    base::Vector<const Char>* subject_string_vector_out,
    const uint8_t** pc_out) {}

bool CheckBitInTable(const uint32_t current_char, const uint8_t* const table) {}

// Returns true iff 0 <= index < length.
bool IndexIsInBounds(int index, int length) {}

// If computed gotos are supported by the compiler, we can get addresses to
// labels directly in C/C++. Every bytecode handler has its own label and we
// store the addresses in a dispatch table indexed by bytecode. To execute the
// next handler we simply jump (goto) directly to its address.
#if V8_USE_COMPUTED_GOTO
#define BC_LABEL
#define DECODE
#define DISPATCH
// Without computed goto support, we fall back to a simple switch-based
// dispatch (A large switch statement inside a loop with a case for every
// bytecode).
#else  // V8_USE_COMPUTED_GOTO
#define BC_LABEL
#define DECODE
#define DISPATCH
#endif  // V8_USE_COMPUTED_GOTO

// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
// instructions can be executed between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH.
// We want those two macros as far apart as possible, because the goto in
// DISPATCH is dependent on a memory load in ADVANCE/SET_PC_FROM_OFFSET. If we
// don't hit the cache and have to fetch the next handler address from physical
// memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can
// potentially be executed unconditionally, reducing memory stall.
#define ADVANCE
#define SET_PC_FROM_OFFSET

// Current position mutations.
#define SET_CURRENT_POSITION
#define ADVANCE_CURRENT_POSITION

#ifdef DEBUG
#define BYTECODE
#else
#define BYTECODE
#endif  // DEBUG

template <typename Char>
IrregexpInterpreter::Result RawMatch(
    Isolate* isolate, Tagged<TrustedByteArray> code_array,
    Tagged<String> subject_string, base::Vector<const Char> subject,
    int* output_registers, int output_register_count, int total_register_count,
    int current, uint32_t current_char, RegExp::CallOrigin call_origin,
    const uint32_t backtrack_limit) {}

#undef BYTECODE
#undef ADVANCE_CURRENT_POSITION
#undef SET_CURRENT_POSITION
#undef DISPATCH
#undef DECODE
#undef SET_PC_FROM_OFFSET
#undef ADVANCE
#undef BC_LABEL
#undef V8_USE_COMPUTED_GOTO

}  // namespace

// static
IrregexpInterpreter::Result IrregexpInterpreter::Match(
    Isolate* isolate, Tagged<IrRegExpData> regexp_data,
    Tagged<String> subject_string, int* output_registers,
    int output_register_count, int start_position,
    RegExp::CallOrigin call_origin) {}

IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
    Isolate* isolate, Tagged<TrustedByteArray> code_array,
    Tagged<String> subject_string, int* output_registers,
    int output_register_count, int total_register_count, int start_position,
    RegExp::CallOrigin call_origin, uint32_t backtrack_limit) {}

#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER

// This method is called through an external reference from RegExpExecInternal
// builtin.
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
    Address subject, int32_t start_position, Address, Address,
    int* output_registers, int32_t output_register_count,
    RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp_data) {}

#endif  // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER

IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
    Isolate* isolate, DirectHandle<IrRegExpData> regexp_data,
    DirectHandle<String> subject_string, int* output_registers,
    int output_register_count, int start_position) {}

}  // namespace internal
}  // namespace v8