chromium/v8/src/objects/intl-objects.cc

// Copyright 2013 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/objects/intl-objects.h"

#include <algorithm>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <vector>

#include "src/api/api-inl.h"
#include "src/base/logging.h"
#include "src/base/strings.h"
#include "src/common/globals.h"
#include "src/date/date.h"
#include "src/execution/isolate.h"
#include "src/execution/local-isolate.h"
#include "src/handles/global-handles.h"
#include "src/heap/factory.h"
#include "src/objects/js-collator-inl.h"
#include "src/objects/js-date-time-format-inl.h"
#include "src/objects/js-locale-inl.h"
#include "src/objects/js-locale.h"
#include "src/objects/js-number-format-inl.h"
#include "src/objects/js-temporal-objects.h"
#include "src/objects/managed-inl.h"
#include "src/objects/objects-inl.h"
#include "src/objects/option-utils.h"
#include "src/objects/property-descriptor.h"
#include "src/objects/smi.h"
#include "src/objects/string.h"
#include "src/strings/string-case.h"
#include "unicode/basictz.h"
#include "unicode/brkiter.h"
#include "unicode/calendar.h"
#include "unicode/coll.h"
#include "unicode/datefmt.h"
#include "unicode/decimfmt.h"
#include "unicode/formattedvalue.h"
#include "unicode/localebuilder.h"
#include "unicode/localematcher.h"
#include "unicode/locid.h"
#include "unicode/normalizer2.h"
#include "unicode/numberformatter.h"
#include "unicode/numfmt.h"
#include "unicode/numsys.h"
#include "unicode/timezone.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/uvernum.h"  // U_ICU_VERSION_MAJOR_NUM

#ifndef V8_INTL_SUPPORT
#error Internationalization is expected to be enabled.
#endif  // V8_INTL_SUPPORT

#define XSTR
#define STR
static_assert;
#undef STR
#undef XSTR

namespace v8::internal {

namespace {

inline constexpr uint8_t AsOneByte(uint16_t ch) {}

constexpr uint8_t kToLower[256] =;

inline constexpr uint8_t ToLatin1Lower(uint8_t ch) {}
// Ensure callers explicitly truncate uint16_t.
inline constexpr uint8_t ToLatin1Lower(uint16_t ch) = delete;

// Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF, or two-byte
// values.
inline constexpr uint8_t ToLatin1Upper(uint8_t ch) {}
// Ensure callers explicitly truncate uint16_t.
inline constexpr uint8_t ToLatin1Upper(uint16_t ch) = delete;

bool ToUpperFastASCII(base::Vector<const uint16_t> src,
                      DirectHandle<SeqOneByteString> result) {}

const uint16_t sharp_s =;

template <typename Char>
bool ToUpperOneByte(base::Vector<const Char> src, uint8_t* dest,
                    int* sharp_s_count) {}

template <typename Char>
void ToUpperWithSharpS(base::Vector<const Char> src,
                       DirectHandle<SeqOneByteString> result) {}

inline int FindFirstUpperOrNonAscii(Tagged<String> s, int length) {}

const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
                                    std::unique_ptr<base::uc16[]>* dest,
                                    int32_t length) {}

template <typename T>
MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
                   Handle<Object> locales, Handle<Object> options,
                   const char* method_name) {}
}  // namespace

const uint8_t* Intl::ToLatin1LowerTable() {}

icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
                                            DirectHandle<String> string,
                                            int offset) {}

namespace {

icu::StringPiece ToICUStringPiece(Isolate* isolate, DirectHandle<String> string,
                                  int offset = 0) {}

MaybeHandle<String> LocaleConvertCase(Isolate* isolate, DirectHandle<String> s,
                                      bool is_to_upper, const char* lang) {}

}  // namespace

// A stripped-down version of ConvertToLower that can only handle flat one-byte
// strings and does not allocate. Note that {src} could still be, e.g., a
// one-byte sliced string with a two-byte parent string.
// Called from TF builtins.
Tagged<String> Intl::ConvertOneByteToLower(Tagged<String> src,
                                           Tagged<String> dst) {}

MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {}

MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {}

std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {}

namespace {

Maybe<icu::Locale> CreateICULocale(const std::string& bcp47_locale) {}

}  // anonymous namespace

// static

MaybeHandle<String> Intl::ToString(Isolate* isolate,
                                   const icu::UnicodeString& string) {}

MaybeHandle<String> Intl::ToString(Isolate* isolate,
                                   const icu::UnicodeString& string,
                                   int32_t begin, int32_t end) {}

namespace {

Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
                                 int index,
                                 DirectHandle<String> field_type_string,
                                 DirectHandle<String> value) {}

}  // namespace

void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
                      DirectHandle<String> field_type_string,
                      DirectHandle<String> value) {}

void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
                      DirectHandle<String> field_type_string,
                      DirectHandle<String> value,
                      Handle<String> additional_property_name,
                      DirectHandle<String> additional_property_value) {}

namespace {

// Build the shortened locale; eg, convert xx_Yyyy_ZZ  to xx_ZZ.
//
// If locale has a script tag then return true and the locale without the
// script else return false and an empty string.
bool RemoveLocaleScriptTag(const std::string& icu_locale,
                           std::string* locale_less_script) {}

bool ValidateResource(const icu::Locale locale, const char* path,
                      const char* key) {}

}  // namespace

std::set<std::string> Intl::BuildLocaleSet(
    const std::vector<std::string>& icu_available_locales, const char* path,
    const char* validate_key) {}

Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {}

// See ecma402/#legacy-constructor.
MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
                                               Handle<JSReceiver> receiver,
                                               Handle<JSFunction> constructor,
                                               bool has_initialized_slot) {}

namespace {

bool IsTwoLetterLanguage(const std::string& locale) {}

bool IsDeprecatedOrLegacyLanguage(const std::string& locale) {}

bool IsStructurallyValidLanguageTag(const std::string& tag) {}

// Canonicalize the locale.
// https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag,
// including type check and structural validity check.
Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
                                           const std::string& locale_in) {}

Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
                                           Handle<Object> locale_in) {}

}  // anonymous namespace

Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
    Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {}

// ecma402 #sup-string.prototype.tolocalelowercase
// ecma402 #sup-string.prototype.tolocaleuppercase
MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
                                                  Handle<String> s,
                                                  bool to_upper,
                                                  Handle<Object> locales) {}

// static
template <class IsolateT>
Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
    IsolateT* isolate, DirectHandle<Object> locales,
    DirectHandle<Object> options) {}

// Instantiations.
template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
    Isolate*, DirectHandle<Object>, DirectHandle<Object>);
template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
    LocalIsolate*, DirectHandle<Object>, DirectHandle<Object>);

std::optional<int> Intl::StringLocaleCompare(
    Isolate* isolate, Handle<String> string1, Handle<String> string2,
    Handle<Object> locales, Handle<Object> options, const char* method_name) {}

namespace  // namespace

// static
const uint8_t* Intl::AsciiCollationWeightsL1() {}

// static
const uint8_t* Intl::AsciiCollationWeightsL3() {}

// static
const int Intl::kAsciiCollationWeightsLength =;

// ecma402/#sec-collator-comparestrings
int Intl::CompareStrings(Isolate* isolate, const icu::Collator& icu_collator,
                         Handle<String> string1, Handle<String> string2,
                         CompareStringsOptions compare_strings_options) {}

// ecma402/#sup-properties-of-the-number-prototype-object
MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
                                               Handle<Object> num,
                                               Handle<Object> locales,
                                               Handle<Object> options,
                                               const char* method_name) {}

namespace {

// 22. is in « 1, 2, 5, 10, 20, 25, 50, 100, 200, 250, 500, 1000, 2000, 2500,
// 5000 »
bool IsValidRoundingIncrement(int value) {}

}  // namespace

Maybe<Intl::NumberFormatDigitOptions> Intl::SetNumberFormatDigitOptions(
    Isolate* isolate, Handle<JSReceiver> options, int mnfd_default,
    int mxfd_default, bool notation_is_compact, const char* service) {}

namespace {

// ecma402/#sec-bestavailablelocale
std::string BestAvailableLocale(const std::set<std::string>& available_locales,
                                const std::string& locale) {}

struct ParsedLocale {};

// Returns a struct containing a bcp47 tag without unicode extensions
// and the removed unicode extensions.
//
// For example, given 'en-US-u-co-emoji' returns 'en-US' and
// 'u-co-emoji'.
ParsedLocale ParseBCP47Locale(const std::string& locale) {}

// ecma402/#sec-lookupsupportedlocales
std::vector<std::string> LookupSupportedLocales(
    const std::set<std::string>& available_locales,
    const std::vector<std::string>& requested_locales) {}

icu::LocaleMatcher BuildLocaleMatcher(
    Isolate* isolate, const std::set<std::string>& available_locales,
    UErrorCode* status) {}

class Iterator : public icu::Locale::Iterator {};

// ecma402/#sec-bestfitmatcher
// The BestFitMatcher abstract operation compares requestedLocales, which must
// be a List as returned by CanonicalizeLocaleList, against the locales in
// availableLocales and determines the best available language to meet the
// request. The algorithm is implementation dependent, but should produce
// results that a typical user of the requested locales would perceive
// as at least as good as those produced by the LookupMatcher abstract
// operation. Options specified through Unicode locale extension sequences must
// be ignored by the algorithm. Information about such subsequences is returned
// separately. The abstract operation returns a record with a [[locale]] field,
// whose value is the language tag of the selected locale, which must be an
// element of availableLocales. If the language tag of the request locale that
// led to the selected locale contained a Unicode locale extension sequence,
// then the returned record also contains an [[extension]] field whose value is
// the first Unicode locale extension sequence within the request locale
// language tag.
std::string BestFitMatcher(Isolate* isolate,
                           const std::set<std::string>& available_locales,
                           const std::vector<std::string>& requested_locales) {}

// ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
// https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
std::vector<std::string> BestFitSupportedLocales(
    Isolate* isolate, const std::set<std::string>& available_locales,
    const std::vector<std::string>& requested_locales) {}

// ecma262 #sec-createarrayfromlist
MaybeHandle<JSArray> CreateArrayFromList(Isolate* isolate,
                                         std::vector<std::string> elements,
                                         PropertyAttributes attr) {}

// ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
// https://tc39.github.io/ecma402/#sec-supportedlocales
MaybeHandle<JSObject> SupportedLocales(
    Isolate* isolate, const char* method_name,
    const std::set<std::string>& available_locales,
    const std::vector<std::string>& requested_locales, Handle<Object> options) {}

}  // namespace

// ecma-402 #sec-intl.getcanonicallocales
MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
                                               Handle<Object> locales) {}

namespace {

MaybeHandle<JSArray> AvailableCollations(Isolate* isolate) {}

MaybeHandle<JSArray> VectorToJSArray(Isolate* isolate,
                                     const std::vector<std::string>& array) {}

namespace {

class ResourceAvailableCurrencies {};

const std::vector<std::string>& GetAvailableCurrencies() {}
}  // namespace

MaybeHandle<JSArray> AvailableCurrencies(Isolate* isolate) {}

MaybeHandle<JSArray> AvailableNumberingSystems(Isolate* isolate) {}

MaybeHandle<JSArray> AvailableTimeZones(Isolate* isolate) {}

MaybeHandle<JSArray> AvailableUnits(Isolate* isolate) {}

}  // namespace

// ecma-402 #sec-intl.supportedvaluesof
MaybeHandle<JSArray> Intl::SupportedValuesOf(Isolate* isolate,
                                             Handle<Object> key_obj) {}

// ECMA 402 Intl.*.supportedLocalesOf
MaybeHandle<JSObject> Intl::SupportedLocalesOf(
    Isolate* isolate, const char* method_name,
    const std::set<std::string>& available_locales, Handle<Object> locales,
    Handle<Object> options) {}

namespace {

template <typename T>
bool IsValidExtension(const icu::Locale& locale, const char* key,
                      const std::string& value) {}

}  // namespace

bool Intl::IsValidCollation(const icu::Locale& locale,
                            const std::string& value) {}

bool Intl::IsWellFormedCalendar(const std::string& value) {}

// ecma402/#sec-iswellformedcurrencycode
bool Intl::IsWellFormedCurrency(const std::string& currency) {}

bool Intl::IsValidCalendar(const icu::Locale& locale,
                           const std::string& value) {}

bool Intl::IsValidNumberingSystem(const std::string& value) {}

namespace {

bool IsWellFormedNumberingSystem(const std::string& value) {}

std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
    icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {}

// ecma402/#sec-lookupmatcher
std::string LookupMatcher(Isolate* isolate,
                          const std::set<std::string>& available_locales,
                          const std::vector<std::string>& requested_locales) {}

}  // namespace

// This function doesn't correspond exactly with the spec. Instead
// we use ICU to do all the string manipulations that the spec
// peforms.
//
// The spec uses this function to normalize values for various
// relevant extension keys (such as disallowing "search" for
// collation). Instead of doing this here, we let the callers of
// this method perform such normalization.
//
// ecma402/#sec-resolvelocale
Maybe<Intl::ResolvedLocale> Intl::ResolveLocale(
    Isolate* isolate, const std::set<std::string>& available_locales,
    const std::vector<std::string>& requested_locales, MatcherOption matcher,
    const std::set<std::string>& relevant_extension_keys) {}

Handle<Managed<icu::UnicodeString>> Intl::SetTextToBreakIterator(
    Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {}

// ecma262 #sec-string.prototype.normalize
MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
                                    Handle<Object> form_input) {}

// ICUTimezoneCache calls out to ICU for TimezoneCache
// functionality in a straightforward way.
class ICUTimezoneCache : public base::TimezoneCache {};

const char* ICUTimezoneCache::LocalTimezone(double time_ms) {}

icu::TimeZone* ICUTimezoneCache::GetTimeZone() {}

bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
                                  int32_t* raw_offset, int32_t* dst_offset) {}

double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {}

double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {}

void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) {}

base::TimezoneCache* Intl::CreateTimeZoneCache() {}

Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
                                                  Handle<JSReceiver> options,
                                                  const char* method_name) {}

Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate,
                                     Handle<JSReceiver> options,
                                     const char* method_name,
                                     std::unique_ptr<char[]>* result) {}

const std::set<std::string>& Intl::GetAvailableLocales() {}

namespace {

struct CheckCalendar {};

}  // namespace

const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() {}

constexpr uint16_t kInfinityChar =;

Handle<String> Intl::NumberFieldToType(Isolate* isolate,
                                       const NumberFormatSpan& part,
                                       const icu::UnicodeString& text,
                                       bool is_nan) {}

// A helper function to convert the FormattedValue for several Intl objects.
MaybeHandle<String> Intl::FormattedToString(
    Isolate* isolate, const icu::FormattedValue& formatted) {}

MaybeHandle<JSArray> Intl::ToJSArray(
    Isolate* isolate, const char* unicode_key,
    icu::StringEnumeration* enumeration,
    const std::function<bool(const char*)>& removes, bool sort) {}

bool Intl::RemoveCollation(const char* collation) {}

// See the list in ecma402 #sec-issanctionedsimpleunitidentifier
std::set<std::string> Intl::SanctionedSimpleUnits() {}

// ecma-402/#sec-isvalidtimezonename

namespace {
bool IsUnicodeStringValidTimeZoneName(const icu::UnicodeString& id) {}
}  // namespace

MaybeHandle<String> Intl::CanonicalizeTimeZoneName(
    Isolate* isolate, DirectHandle<String> identifier) {}

bool Intl::IsValidTimeZoneName(Isolate* isolate, DirectHandle<String> id) {}

bool Intl::IsValidTimeZoneName(const icu::TimeZone& tz) {}

// Function to support Temporal
std::string Intl::TimeZoneIdFromIndex(int32_t index) {}

int32_t Intl::GetTimeZoneIndex(Isolate* isolate,
                               DirectHandle<String> identifier) {}

Intl::FormatRangeSourceTracker::FormatRangeSourceTracker() {}

void Intl::FormatRangeSourceTracker::Add(int32_t field, int32_t start,
                                         int32_t limit) {}

Intl::FormatRangeSource Intl::FormatRangeSourceTracker::GetSource(
    int32_t start, int32_t limit) const {}

bool Intl::FormatRangeSourceTracker::FieldContains(int32_t field, int32_t start,
                                                   int32_t limit) const {}

Handle<String> Intl::SourceString(Isolate* isolate, FormatRangeSource source) {}

Handle<String> Intl::DefaultTimeZone(Isolate* isolate) {}

namespace {

const icu::BasicTimeZone* CreateBasicTimeZoneFromIndex(
    int32_t time_zone_index) {}

// ICU only support TimeZone information in millisecond but Temporal require
// nanosecond. For most of the case, we find an approximate millisecond by
// floor to the millisecond just past the nanosecond_epoch. For negative epoch
// value, the BigInt Divide will floor closer to zero so we need to minus 1 if
// the remainder is not zero. For the case of finding previous transition, we
// need to ceil to the millisecond in the near future of the nanosecond_epoch.
enum class Direction {};
int64_t ApproximateMillisecondEpoch(Isolate* isolate,
                                    Handle<BigInt> nanosecond_epoch,
                                    Direction direction = Direction::kPast) {}

// Helper function to convert the milliseconds in int64_t
// to a BigInt in nanoseconds.
Handle<BigInt> MillisecondToNanosecond(Isolate* isolate, int64_t ms) {}

}  // namespace

Handle<Object> Intl::GetTimeZoneOffsetTransitionNanoseconds(
    Isolate* isolate, int32_t time_zone_index, Handle<BigInt> nanosecond_epoch,
    Intl::Transition transition) {}

std::vector<Handle<BigInt>> Intl::GetTimeZonePossibleOffsetNanoseconds(
    Isolate* isolate, int32_t time_zone_index,
    Handle<BigInt> nanosecond_epoch) {}

int64_t Intl::GetTimeZoneOffsetNanoseconds(Isolate* isolate,
                                           int32_t time_zone_index,
                                           Handle<BigInt> nanosecond_epoch) {}

}  // namespace v8::internal