// Copyright 2017 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_OBJECTS_STRING_H_ #define V8_OBJECTS_STRING_H_ #include <memory> #include <optional> #include "src/base/bits.h" #include "src/base/export-template.h" #include "src/base/small-vector.h" #include "src/base/strings.h" #include "src/common/globals.h" #include "src/heap/heap.h" #include "src/objects/instance-type.h" #include "src/objects/map.h" #include "src/objects/name.h" #include "src/objects/smi.h" #include "src/objects/tagged.h" #include "src/sandbox/external-pointer.h" #include "src/strings/unicode-decoder.h" // Has to be the last include (doesn't have include guards): #include "src/objects/object-macros.h" namespace v8::internal { namespace maglev { class CheckedInternalizedString; class BuiltinStringFromCharCode; } // namespace maglev namespace wasm { namespace baseline { class LiftoffCompiler; } // namespace baseline } // namespace wasm class SharedStringAccessGuardIfNeeded; enum InstanceType : uint16_t; enum AllowNullsFlag { … }; enum RobustnessFlag { … }; // The characteristics of a string are stored in its map. Retrieving these // few bits of information is moderately expensive, involving two memory // loads where the second is dependent on the first. To improve efficiency // the shape of the string is given its own class so that it can be retrieved // once and used for several string operations. A StringShape is small enough // to be passed by value and is immutable, but be aware that flattening a // string can potentially alter its shape. Also be aware that a GC caused by // something else can alter the shape of a string due to ConsString // shortcutting. Keeping these restrictions in mind has proven to be error- // prone and so we no longer put StringShapes in variables unless there is a // concrete performance benefit at that particular point in the code. class StringShape { … }; // The String abstract class captures JavaScript string values: // // Ecma-262: // 4.3.16 String Value // A string value is a member of the type String and is a finite // ordered sequence of zero or more 16-bit unsigned integer values. // // All string values have a length field. V8_OBJECT class String : public Name { … } V8_OBJECT_END; template <> struct ObjectTraits<String> { … }; // clang-format off extern template EXPORT_TEMPLATE_DECLARE(…) void String::WriteToFlat(Tagged<String> source, uint8_t* sink, int from, int to); extern template EXPORT_TEMPLATE_DECLARE(…) void String::WriteToFlat(Tagged<String> source, uint16_t* sink, int from, int to); extern template EXPORT_TEMPLATE_DECLARE(…) void String::WriteToFlat(Tagged<String> source, uint8_t* sink, int from, int to, const SharedStringAccessGuardIfNeeded&); extern template EXPORT_TEMPLATE_DECLARE(…) void String::WriteToFlat(Tagged<String> source, uint16_t* sink, int from, int to, const SharedStringAccessGuardIfNeeded&); // clang-format on class SubStringRange { … }; // The SeqString abstract class captures sequential string values. class SeqString : public String { … }; V8_OBJECT class InternalizedString : public String { … } V8_OBJECT_END; // The OneByteString class captures sequential one-byte string objects. // Each character in the OneByteString is an one-byte character. V8_OBJECT class SeqOneByteString : public SeqString { … } V8_OBJECT_END; template <> struct ObjectTraits<SeqOneByteString> { … }; // The TwoByteString class captures sequential unicode string objects. // Each character in the TwoByteString is a two-byte uint16_t. V8_OBJECT class SeqTwoByteString : public SeqString { … } V8_OBJECT_END; template <> struct ObjectTraits<SeqTwoByteString> { … }; // The ConsString class describes string values built by using the // addition operator on strings. A ConsString is a pair where the // first and second components are pointers to other string values. // One or both components of a ConsString can be pointers to other // ConsStrings, creating a binary tree of ConsStrings where the leaves // are non-ConsString string values. The string value represented by // a ConsString can be obtained by concatenating the leaf string // values in a left-to-right depth-first traversal of the tree. V8_OBJECT class ConsString : public String { … } V8_OBJECT_END; template <> struct ObjectTraits<ConsString> { … }; // The ThinString class describes string objects that are just references // to another string object. They are used for in-place internalization when // the original string cannot actually be internalized in-place: in these // cases, the original string is converted to a ThinString pointing at its // internalized version (which is allocated as a new object). // In terms of memory layout and most algorithms operating on strings, // ThinStrings can be thought of as "one-part cons strings". V8_OBJECT class ThinString : public String { … } V8_OBJECT_END; template <> struct ObjectTraits<ThinString> { … }; // The Sliced String class describes strings that are substrings of another // sequential string. The motivation is to save time and memory when creating // a substring. A Sliced String is described as a pointer to the parent, // the offset from the start of the parent string and the length. Using // a Sliced String therefore requires unpacking of the parent string and // adding the offset to the start address. A substring of a Sliced String // are not nested since the double indirection is simplified when creating // such a substring. // Currently missing features are: // - truncating sliced string to enable otherwise unneeded parent to be GC'ed. V8_OBJECT class SlicedString : public String { … } V8_OBJECT_END; template <> struct ObjectTraits<SlicedString> { … }; // TODO(leszeks): Build this out into a full V8 class. V8_OBJECT class UncachedExternalString : public String { … } V8_OBJECT_END; // The ExternalString class describes string values that are backed by // a string resource that lies outside the V8 heap. ExternalStrings // consist of the length field common to all strings, a pointer to the // external resource. It is important to ensure (externally) that the // resource is not deallocated while the ExternalString is live in the // V8 heap. // // The API expects that all ExternalStrings are created through the // API. Therefore, ExternalStrings should not be used internally. V8_OBJECT class ExternalString : public UncachedExternalString { … } V8_OBJECT_END; template <> struct ObjectTraits<ExternalString> { … }; // The ExternalOneByteString class is an external string backed by an // one-byte string. V8_OBJECT class ExternalOneByteString : public ExternalString { … } V8_OBJECT_END; static_assert …; // The ExternalTwoByteString class is an external string backed by a UTF-16 // encoded string. V8_OBJECT class ExternalTwoByteString : public ExternalString { … } V8_OBJECT_END; static_assert …; // A flat string reader provides random access to the contents of a // string independent of the character width of the string. The handle // must be valid as long as the reader is being used. // Not safe to use from concurrent background threads. class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable { … }; // This maintains an off-stack representation of the stack frames required // to traverse a ConsString, allowing an entirely iterative and restartable // traversal of the entire string class ConsStringIterator { … }; class StringCharacterStream; template <typename Char> struct CharTraits; template <> struct CharTraits<uint8_t> { … }; template <> struct CharTraits<uint16_t> { … }; } // namespace v8::internal #include "src/objects/object-macros-undef.h" #endif // V8_OBJECTS_STRING_H_