chromium/v8/src/objects/string.h

// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_OBJECTS_STRING_H_
#define V8_OBJECTS_STRING_H_

#include <memory>
#include <optional>

#include "src/base/bits.h"
#include "src/base/export-template.h"
#include "src/base/small-vector.h"
#include "src/base/strings.h"
#include "src/common/globals.h"
#include "src/heap/heap.h"
#include "src/objects/instance-type.h"
#include "src/objects/map.h"
#include "src/objects/name.h"
#include "src/objects/smi.h"
#include "src/objects/tagged.h"
#include "src/sandbox/external-pointer.h"
#include "src/strings/unicode-decoder.h"

// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"

namespace v8::internal {

namespace maglev {
class CheckedInternalizedString;
class BuiltinStringFromCharCode;
}  // namespace maglev

namespace wasm {
namespace baseline {
class LiftoffCompiler;
}  // namespace baseline
}  // namespace wasm

class SharedStringAccessGuardIfNeeded;

enum InstanceType : uint16_t;

enum AllowNullsFlag {};
enum RobustnessFlag {};

// The characteristics of a string are stored in its map.  Retrieving these
// few bits of information is moderately expensive, involving two memory
// loads where the second is dependent on the first.  To improve efficiency
// the shape of the string is given its own class so that it can be retrieved
// once and used for several string operations.  A StringShape is small enough
// to be passed by value and is immutable, but be aware that flattening a
// string can potentially alter its shape.  Also be aware that a GC caused by
// something else can alter the shape of a string due to ConsString
// shortcutting.  Keeping these restrictions in mind has proven to be error-
// prone and so we no longer put StringShapes in variables unless there is a
// concrete performance benefit at that particular point in the code.
class StringShape {};

// The String abstract class captures JavaScript string values:
//
// Ecma-262:
//  4.3.16 String Value
//    A string value is a member of the type String and is a finite
//    ordered sequence of zero or more 16-bit unsigned integer values.
//
// All string values have a length field.
V8_OBJECT class String : public Name {} V8_OBJECT_END;

template <>
struct ObjectTraits<String> {};

// clang-format off
extern template EXPORT_TEMPLATE_DECLARE() void
    String::WriteToFlat(Tagged<String> source, uint8_t* sink, int from, int to);
extern template EXPORT_TEMPLATE_DECLARE() void
    String::WriteToFlat(Tagged<String> source, uint16_t* sink, int from,
                        int to);
extern template EXPORT_TEMPLATE_DECLARE() void
    String::WriteToFlat(Tagged<String> source, uint8_t* sink, int from, int to,
                        const SharedStringAccessGuardIfNeeded&);
extern template EXPORT_TEMPLATE_DECLARE() void
    String::WriteToFlat(Tagged<String> source, uint16_t* sink, int from, int to,
                        const SharedStringAccessGuardIfNeeded&);
// clang-format on

class SubStringRange {};

// The SeqString abstract class captures sequential string values.
class SeqString : public String {};

V8_OBJECT class InternalizedString : public String {} V8_OBJECT_END;

// The OneByteString class captures sequential one-byte string objects.
// Each character in the OneByteString is an one-byte character.
V8_OBJECT class SeqOneByteString : public SeqString {} V8_OBJECT_END;

template <>
struct ObjectTraits<SeqOneByteString> {};

// The TwoByteString class captures sequential unicode string objects.
// Each character in the TwoByteString is a two-byte uint16_t.
V8_OBJECT class SeqTwoByteString : public SeqString {} V8_OBJECT_END;

template <>
struct ObjectTraits<SeqTwoByteString> {};

// The ConsString class describes string values built by using the
// addition operator on strings.  A ConsString is a pair where the
// first and second components are pointers to other string values.
// One or both components of a ConsString can be pointers to other
// ConsStrings, creating a binary tree of ConsStrings where the leaves
// are non-ConsString string values.  The string value represented by
// a ConsString can be obtained by concatenating the leaf string
// values in a left-to-right depth-first traversal of the tree.
V8_OBJECT class ConsString : public String {} V8_OBJECT_END;

template <>
struct ObjectTraits<ConsString> {};

// The ThinString class describes string objects that are just references
// to another string object. They are used for in-place internalization when
// the original string cannot actually be internalized in-place: in these
// cases, the original string is converted to a ThinString pointing at its
// internalized version (which is allocated as a new object).
// In terms of memory layout and most algorithms operating on strings,
// ThinStrings can be thought of as "one-part cons strings".
V8_OBJECT class ThinString : public String {} V8_OBJECT_END;

template <>
struct ObjectTraits<ThinString> {};

// The Sliced String class describes strings that are substrings of another
// sequential string.  The motivation is to save time and memory when creating
// a substring.  A Sliced String is described as a pointer to the parent,
// the offset from the start of the parent string and the length.  Using
// a Sliced String therefore requires unpacking of the parent string and
// adding the offset to the start address.  A substring of a Sliced String
// are not nested since the double indirection is simplified when creating
// such a substring.
// Currently missing features are:
//  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
V8_OBJECT class SlicedString : public String {} V8_OBJECT_END;

template <>
struct ObjectTraits<SlicedString> {};

// TODO(leszeks): Build this out into a full V8 class.
V8_OBJECT class UncachedExternalString : public String {} V8_OBJECT_END;

// The ExternalString class describes string values that are backed by
// a string resource that lies outside the V8 heap.  ExternalStrings
// consist of the length field common to all strings, a pointer to the
// external resource.  It is important to ensure (externally) that the
// resource is not deallocated while the ExternalString is live in the
// V8 heap.
//
// The API expects that all ExternalStrings are created through the
// API.  Therefore, ExternalStrings should not be used internally.
V8_OBJECT class ExternalString : public UncachedExternalString {} V8_OBJECT_END;

template <>
struct ObjectTraits<ExternalString> {};

// The ExternalOneByteString class is an external string backed by an
// one-byte string.
V8_OBJECT class ExternalOneByteString : public ExternalString {} V8_OBJECT_END;

static_assert;

// The ExternalTwoByteString class is an external string backed by a UTF-16
// encoded string.
V8_OBJECT class ExternalTwoByteString : public ExternalString {} V8_OBJECT_END;

static_assert;

// A flat string reader provides random access to the contents of a
// string independent of the character width of the string. The handle
// must be valid as long as the reader is being used.
// Not safe to use from concurrent background threads.
class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable {};

// This maintains an off-stack representation of the stack frames required
// to traverse a ConsString, allowing an entirely iterative and restartable
// traversal of the entire string
class ConsStringIterator {};

class StringCharacterStream;

template <typename Char>
struct CharTraits;

template <>
struct CharTraits<uint8_t> {};

template <>
struct CharTraits<uint16_t> {};

}  // namespace v8::internal

#include "src/objects/object-macros-undef.h"

#endif  // V8_OBJECTS_STRING_H_