chromium/ppapi/shared_impl/private/ppb_char_set_shared.cc

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/351564777): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "ppapi/shared_impl/private/ppb_char_set_shared.h"

#include <string.h>

#include <algorithm>

#include "base/i18n/icu_string_conversions.h"
#include "ppapi/c/dev/ppb_memory_dev.h"
#include "ppapi/thunk/thunk.h"
#include "third_party/icu/source/common/unicode/ucnv.h"
#include "third_party/icu/source/common/unicode/ucnv_cb.h"
#include "third_party/icu/source/common/unicode/ucnv_err.h"
#include "third_party/icu/source/common/unicode/ustring.h"

namespace ppapi {

namespace {

PP_CharSet_Trusted_ConversionError DeprecatedToConversionError(
    PP_CharSet_ConversionError on_error) {
  switch (on_error) {
    case PP_CHARSET_CONVERSIONERROR_SKIP:
      return PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP;
    case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE:
      return PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE;
    case PP_CHARSET_CONVERSIONERROR_FAIL:
    default:
      return PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL;
  }
}

// Converts the given PP error handling behavior to the version in base,
// placing the result in |*result| and returning true on success. Returns false
// if the enum is invalid.
bool PPToBaseConversionError(PP_CharSet_Trusted_ConversionError on_error,
                             base::OnStringConversionError::Type* result) {
  switch (on_error) {
    case PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL:
      *result = base::OnStringConversionError::FAIL;
      return true;
    case PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP:
      *result = base::OnStringConversionError::SKIP;
      return true;
    case PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE:
      *result = base::OnStringConversionError::SUBSTITUTE;
      return true;
    default:
      return false;
  }
}

}  // namespace

// static
// The "substitution" behavior of this function does not match the
// implementation in base, so we partially duplicate the code from
// icu_string_conversions.cc with the correct error handling setup required
// by the PPAPI interface.
char* PPB_CharSet_Shared::UTF16ToCharSetDeprecated(
    const uint16_t* utf16,
    uint32_t utf16_len,
    const char* output_char_set,
    PP_CharSet_ConversionError deprecated_on_error,
    uint32_t* output_length) {
  *output_length = 0;
  PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError(
      deprecated_on_error);

  // Compute required length.
  uint32_t required_length = 0;
  UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error, NULL,
                 &required_length);

  // Our output is null terminated, so need one more byte.
  char* ret_buf = static_cast<char*>(
      thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(required_length + 1));

  // Do the conversion into the buffer.
  PP_Bool result = UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error,
                                  ret_buf, &required_length);
  if (result == PP_FALSE) {
    thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf);
    return NULL;
  }
  ret_buf[required_length] = 0;  // Null terminate.
  *output_length = required_length;
  return ret_buf;
}

// static
PP_Bool PPB_CharSet_Shared::UTF16ToCharSet(
    const uint16_t utf16[],
    uint32_t utf16_len,
    const char* output_char_set,
    PP_CharSet_Trusted_ConversionError on_error,
    char* output_buffer,
    uint32_t* output_length) {
  if (!utf16 || !output_char_set || !output_length) {
    *output_length = 0;
    return PP_FALSE;
  }

  UErrorCode status = U_ZERO_ERROR;
  UConverter* converter = ucnv_open(output_char_set, &status);
  if (!U_SUCCESS(status)) {
    *output_length = 0;
    return PP_FALSE;
  }

  // Setup our error handler.
  switch (on_error) {
    case PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL:
      ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0,
                            NULL, NULL, &status);
      break;
    case PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP:
      ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0,
                            NULL, NULL, &status);
      break;
    case PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE: {
      // ICU sets the substitution char for some character sets (like latin1)
      // to be the ASCII "substitution character" (26). We want to use '?'
      // instead for backwards-compat with Windows behavior.
      char subst_chars[32];
      int8_t subst_chars_len = 32;
      ucnv_getSubstChars(converter, subst_chars, &subst_chars_len, &status);
      if (subst_chars_len == 1 && subst_chars[0] == 26) {
        // Override to the question mark character if possible. When using
        // setSubstString, the input is a Unicode character. The function will
        // try to convert it to the destination character set and fail if that
        // can not be converted to the destination character set.
        //
        // We just ignore any failure. If the dest char set has no
        // representation for '?', then we'll just stick to the ICU default
        // substitution character.
        UErrorCode subst_status = U_ZERO_ERROR;
        UChar question_mark = '?';
        ucnv_setSubstString(converter, &question_mark, 1, &subst_status);
      }

      ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
                            NULL, NULL, &status);
      break;
    }
    default:
      *output_length = 0;
      ucnv_close(converter);
      return PP_FALSE;
  }

  // ucnv_fromUChars returns required size not including terminating null.
  *output_length = static_cast<uint32_t>(ucnv_fromUChars(
      converter, output_buffer, output_buffer ? *output_length : 0,
      reinterpret_cast<const UChar*>(utf16), utf16_len, &status));

  ucnv_close(converter);
  if (status == U_BUFFER_OVERFLOW_ERROR) {
    // Don't treat this as a fatal error since we need to return the string
    // size.
    return PP_TRUE;
  } else if (!U_SUCCESS(status)) {
    *output_length = 0;
    return PP_FALSE;
  }
  return PP_TRUE;
}

// static
uint16_t* PPB_CharSet_Shared::CharSetToUTF16Deprecated(
    const char* input,
    uint32_t input_len,
    const char* input_char_set,
    PP_CharSet_ConversionError deprecated_on_error,
    uint32_t* output_length) {
  *output_length = 0;
  PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError(
      deprecated_on_error);

  // Compute required length.
  uint32_t required_length = 0;
  CharSetToUTF16(input, input_len, input_char_set, on_error, NULL,
                 &required_length);

  // Our output is null terminated, so need one more byte.
  uint16_t* ret_buf = static_cast<uint16_t*>(
      thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(
          (required_length + 1) * sizeof(uint16_t)));

  // Do the conversion into the buffer.
  PP_Bool result = CharSetToUTF16(input, input_len, input_char_set, on_error,
                                  ret_buf, &required_length);
  if (result == PP_FALSE) {
    thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf);
    return NULL;
  }
  ret_buf[required_length] = 0;  // Null terminate.
  *output_length = required_length;
  return ret_buf;
}

PP_Bool PPB_CharSet_Shared::CharSetToUTF16(
    const char* input,
    uint32_t input_len,
    const char* input_char_set,
    PP_CharSet_Trusted_ConversionError on_error,
    uint16_t* output_buffer,
    uint32_t* output_utf16_length) {
  if (!input || !input_char_set || !output_utf16_length) {
    *output_utf16_length = 0;
    return PP_FALSE;
  }

  base::OnStringConversionError::Type base_on_error;
  if (!PPToBaseConversionError(on_error, &base_on_error)) {
    *output_utf16_length = 0;
    return PP_FALSE;  // Invalid enum value.
  }

  // We can convert this call to the implementation in base to avoid code
  // duplication, although this does introduce an extra copy of the data.
  std::u16string output;
  if (!base::CodepageToUTF16(std::string(input, input_len), input_char_set,
                             base_on_error, &output)) {
    *output_utf16_length = 0;
    return PP_FALSE;
  }

  if (output_buffer) {
    memcpy(output_buffer, output.c_str(),
           std::min(*output_utf16_length, static_cast<uint32_t>(output.size()))
           * sizeof(uint16_t));
  }
  *output_utf16_length = static_cast<uint32_t>(output.size());
  return PP_TRUE;
}

}  // namespace ppapi