chromium/net/android/java/src/org/chromium/net/NetStringUtil.java

// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

package org.chromium.net;

import org.jni_zero.CalledByNative;
import org.jni_zero.JNINamespace;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.text.Normalizer;
import java.util.Locale;

/**
 * Utility functions for converting strings between formats when not built with
 * icu.
 */
@JNINamespace("net::android")
public class NetStringUtil {
    /**
     * Attempts to convert text in a given character set to a Unicode string.
     * Returns null on failure.
     * @param text ByteBuffer containing the character array to convert.
     * @param charsetName Character set it's in encoded in.
     * @return: Unicode string on success, null on failure.
     */
    @CalledByNative
    private static String convertToUnicode(ByteBuffer text, String charsetName) {
        try {
            Charset charset = Charset.forName(charsetName);
            CharsetDecoder decoder = charset.newDecoder();
            // On invalid characters, this will throw an exception.
            return decoder.decode(text).toString();
        } catch (Exception e) {
            return null;
        }
    }

    /**
     * Attempts to convert text in a given character set to a Unicode string,
     * and normalize it.  Returns null on failure.
     * @param text ByteBuffer containing the character array to convert.
     * @param charsetName Character set it's in encoded in.
     * @return: Unicode string on success, null on failure.
     */
    @CalledByNative
    private static String convertToUnicodeAndNormalize(ByteBuffer text, String charsetName) {
        String unicodeString = convertToUnicode(text, charsetName);
        if (unicodeString == null) return null;
        return Normalizer.normalize(unicodeString, Normalizer.Form.NFC);
    }

    /**
     * Convert text in a given character set to a Unicode string.  Any invalid
     * characters are replaced with U+FFFD.  Returns null if the character set
     * is not recognized.
     * @param text ByteBuffer containing the character array to convert.
     * @param charsetName Character set it's in encoded in.
     * @return: Unicode string on success, null on failure.
     */
    @CalledByNative
    private static String convertToUnicodeWithSubstitutions(ByteBuffer text, String charsetName) {
        try {
            Charset charset = Charset.forName(charsetName);

            // TODO(mmenke):  Investigate if Charset.decode() can be used
            // instead.  The question is whether it uses the proper replace
            // character.  JDK CharsetDecoder docs say U+FFFD is the default,
            // but Charset.decode() docs say it uses the "charset's default
            // replacement byte array".
            CharsetDecoder decoder = charset.newDecoder();
            decoder.onMalformedInput(CodingErrorAction.REPLACE);
            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
            decoder.replaceWith("\uFFFD");
            return decoder.decode(text).toString();
        } catch (Exception e) {
            return null;
        }
    }

    /**
     * Convert a string to uppercase.
     * @param str String to convert.
     * @return: String converted to uppercase using default locale,
     * null on failure.
     */
    @CalledByNative
    private static String toUpperCase(String str) {
        try {
            return str.toUpperCase(Locale.getDefault());
        } catch (Exception e) {
            return null;
        }
    }
}