chromium/v8/src/strings/unicode.h

// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_STRINGS_UNICODE_H_
#define V8_STRINGS_UNICODE_H_

#include <sys/types.h>

#include "src/base/bit-field.h"
#include "src/base/vector.h"
#include "src/common/globals.h"
#include "src/third_party/utf8-decoder/utf8-decoder.h"
/**
 * \file
 * Definitions and convenience functions for working with unicode.
 */

namespace unibrow {

uchar;

/**
 * The max length of the result of converting the case of a single
 * character.
 */
const int kMaxMappingSize =;

#ifndef V8_INTL_SUPPORT
template <class T, int size = 256>
class Predicate {
 public:
  inline Predicate() = default;
  inline bool get(uchar c);

 private:
  friend class Test;
  bool CalculateValue(uchar c);
  class CacheEntry {
   public:
    inline CacheEntry()
        : bit_field_(CodePointField::encode(0) | ValueField::encode(0)) {}
    inline CacheEntry(uchar code_point, bool value)
        : bit_field_(
              CodePointField::encode(CodePointField::kMask & code_point) |
              ValueField::encode(value)) {
      DCHECK_IMPLIES((CodePointField::kMask & code_point) != code_point,
                     code_point == static_cast<uchar>(-1));
    }

    uchar code_point() const { return CodePointField::decode(bit_field_); }
    bool value() const { return ValueField::decode(bit_field_); }

   private:
    using CodePointField = v8::base::BitField<uchar, 0, 21>;
    using ValueField = v8::base::BitField<bool, 21, 1>;

    uint32_t bit_field_;
  };
  static const int kSize = size;
  static const int kMask = kSize - 1;
  CacheEntry entries_[kSize];
};

// A cache used in case conversion.  It caches the value for characters
// that either have no mapping or map to a single character independent
// of context.  Characters that map to more than one character or that
// map differently depending on context are always looked up.
template <class T, int size = 256>
class Mapping {
 public:
  inline Mapping() = default;
  inline int get(uchar c, uchar n, uchar* result);

 private:
  friend class Test;
  int CalculateValue(uchar c, uchar n, uchar* result);
  struct CacheEntry {
    inline CacheEntry() : code_point_(kNoChar), offset_(0) {}
    inline CacheEntry(uchar code_point, signed offset)
        : code_point_(code_point), offset_(offset) {}
    uchar code_point_;
    signed offset_;
    static const int kNoChar = (1 << 21) - 1;
  };
  static const int kSize = size;
  static const int kMask = kSize - 1;
  CacheEntry entries_[kSize];
};

class UnicodeData {
 private:
  friend class Test;
  static int GetByteCount();
  static const uchar kMaxCodePoint;
};

#endif  // !V8_INTL_SUPPORT

class Utf16 {};

class Latin1 {};

enum class Utf8Variant : uint8_t {};

class V8_EXPORT_PRIVATE Utf8 {};

#if V8_ENABLE_WEBASSEMBLY
class V8_EXPORT_PRIVATE Wtf8 {};
#endif  // V8_ENABLE_WEBASSEMBLY

struct Uppercase {};
struct Letter {};
#ifndef V8_INTL_SUPPORT
struct V8_EXPORT_PRIVATE ID_Start {
  static bool Is(uchar c);
};
struct V8_EXPORT_PRIVATE ID_Continue {
  static bool Is(uchar c);
};
struct V8_EXPORT_PRIVATE WhiteSpace {
  static bool Is(uchar c);
};
#endif  // !V8_INTL_SUPPORT

// LineTerminator:       'JS_Line_Terminator' in point.properties
// ES#sec-line-terminators lists exactly 4 code points:
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
V8_INLINE bool IsLineTerminator(uchar c) {}

V8_INLINE bool IsStringLiteralLineTerminator(uchar c) {}

#ifndef V8_INTL_SUPPORT
struct V8_EXPORT_PRIVATE ToLowercase {
  static const int kMaxWidth = 3;
  static const bool kIsToLower = true;
  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
};
struct V8_EXPORT_PRIVATE ToUppercase {
  static const int kMaxWidth = 3;
  static const bool kIsToLower = false;
  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
};
struct V8_EXPORT_PRIVATE Ecma262Canonicalize {
  static const int kMaxWidth = 1;
  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
};
struct V8_EXPORT_PRIVATE Ecma262UnCanonicalize {
  static const int kMaxWidth = 4;
  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
};
struct V8_EXPORT_PRIVATE CanonicalizationRange {
  static const int kMaxWidth = 1;
  static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
};
#endif  // !V8_INTL_SUPPORT

}  // namespace unibrow

#endif  // V8_STRINGS_UNICODE_H_