# HG changeset patch # User redestad # Date 1497385408 -7200 # Tue Jun 13 22:23:28 2017 +0200 # Node ID a0a6ed10f26e70c86900c9d5dbeb0f9ba0569a4d # Parent 875fa66a13a242ec947ba309c99e2a88991dbf4f 8181147: JNI_GetStringPlatformChars should have a fast path for UTF-8 Reviewed-by: martin, erikj, chegar diff --git a/make/test/JtregNative.gmk b/make/test/JtregNative.gmk --- a/make/test/JtregNative.gmk +++ b/make/test/JtregNative.gmk @@ -44,12 +44,22 @@ # Add more directories here when needed. BUILD_JDK_JTREG_NATIVE_SRC += \ $(JDK_TOPDIR)/test/native_sanity \ + $(JDK_TOPDIR)/test/java/lang/String/nativeEncoding \ # BUILD_JDK_JTREG_OUTPUT_DIR := $(BUILD_OUTPUT)/support/test/jdk/jtreg/native BUILD_JDK_JTREG_IMAGE_DIR := $(TEST_IMAGE_DIR)/jdk/jtreg +ifeq ($(OPENJDK_TARGET_OS), windows) + WIN_LIB_JAVA := $(SUPPORT_OUTPUTDIR)/native/java.base/libjava/java.lib + BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := $(WIN_LIB_JAVA) +else ifeq ($(OPENJDK_TARGET_OS), solaris) + BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := -ljava -lc +else + BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := -ljava +endif + $(eval $(call SetupTestFilesCompilation, BUILD_JDK_JTREG_LIBRARIES, \ TYPE := LIBRARY, \ SOURCE_DIRS := $(BUILD_JDK_JTREG_NATIVE_SRC), \ diff --git a/src/java.base/share/native/libjava/jni_util.c b/src/java.base/share/native/libjava/jni_util.c --- a/src/java.base/share/native/libjava/jni_util.c +++ b/src/java.base/share/native/libjava/jni_util.c @@ -442,11 +442,10 @@ return obj; } -/* Optimized for char set ISO_8559_1 */ +/* Optimized for charset ISO_8559_1 */ static jstring -newString8859_1(JNIEnv *env, const char *str) +newSizedString8859_1(JNIEnv *env, const char *str, const int len) { - int len = (int)strlen(str); jchar buf[512]; jchar *str1; jstring result; @@ -469,6 +468,16 @@ return result; } +static jstring +newString8859_1(JNIEnv *env, const char *str) +{ + int len = (int)strlen(str); + return newSizedString8859_1(env, str, len); +} + +static const jbyte LATIN1 = 0; +static const jbyte UTF16 = 1; + static const char* getString8859_1Chars(JNIEnv *env, jstring jstr) { @@ -501,7 +510,7 @@ } -/* Optimized for char set ISO646-US (us-ascii) */ +/* Optimized for charset ISO646-US (us-ascii) */ static jstring newString646_US(JNIEnv *env, const char *str) { @@ -573,7 +582,7 @@ 0x02Dc,0x2122,0x0161,0x203A,0x0153,0xFFFD,0x017E,0x0178 }; -/* Optimized for char set Cp1252 */ +/* Optimized for charset Cp1252 */ static jstring newStringCp1252(JNIEnv *env, const char *str) { @@ -671,8 +680,89 @@ static jmethodID String_init_ID; /* String(byte[], enc) */ static jmethodID String_getBytes_ID; /* String.getBytes(enc) */ -int getFastEncoding() { - return fastEncoding; +/* Cached field IDs */ +static jfieldID String_coder_ID; /* String.coder */ +static jfieldID String_value_ID; /* String.value */ + +static jboolean isJNUEncodingSupported = JNI_FALSE; +static jboolean jnuEncodingSupported(JNIEnv *env) { + jboolean exe; + if (isJNUEncodingSupported == JNI_TRUE) { + return JNI_TRUE; + } + isJNUEncodingSupported = (jboolean) JNU_CallStaticMethodByName ( + env, &exe, + "java/nio/charset/Charset", + "isSupported", + "(Ljava/lang/String;)Z", + jnuEncoding).z; + return isJNUEncodingSupported; +} + +/* Create a new string by converting str to a heap-allocated byte array and + * calling the appropriate String constructor. + */ +static jstring +newSizedStringJava(JNIEnv *env, const char *str, const int len) +{ + jstring result = NULL; + jbyteArray bytes = 0; + + if ((*env)->EnsureLocalCapacity(env, 2) < 0) + return NULL; + + bytes = (*env)->NewByteArray(env, len); + if (bytes != NULL) { + jclass strClazz = JNU_ClassString(env); + CHECK_NULL_RETURN(strClazz, 0); + (*env)->SetByteArrayRegion(env, bytes, 0, len, (jbyte *)str); + if (jnuEncodingSupported(env)) { + result = (*env)->NewObject(env, strClazz, + String_init_ID, bytes, jnuEncoding); + } else { + /*If the encoding specified in sun.jnu.encoding is not endorsed + by "Charset.isSupported" we have to fall back to use String(byte[]) + explicitly here without specifying the encoding name, in which the + StringCoding class will pickup the iso-8859-1 as the fallback + converter for us. + */ + jmethodID mid = (*env)->GetMethodID(env, strClazz, + "", "([B)V"); + if (mid != NULL) { + result = (*env)->NewObject(env, strClazz, mid, bytes); + } + } + (*env)->DeleteLocalRef(env, bytes); + return result; + } + return NULL; +} + +static jstring +newStringJava(JNIEnv *env, const char *str) +{ + int len = (int)strlen(str); + return newSizedStringJava(env, str, len); +} + +/* Optimized for charset UTF-8 */ +static jstring +newStringUTF8(JNIEnv *env, const char *str) +{ + int len; + const unsigned char *p; + unsigned char asciiCheck; + for (asciiCheck = 0, p = (const unsigned char*)str; *p != '\0'; p++) { + asciiCheck |= *p; + } + len = (int)((const char*)p - str); + + if (asciiCheck < 0x80) { + // ascii fast-path + return newSizedString8859_1(env, str, len); + } + + return newSizedStringJava(env, str, len); } /* Initialize the fast encoding. If the "sun.jnu.encoding" property @@ -718,17 +808,20 @@ if ((strcmp(encname, "8859_1") == 0) || (strcmp(encname, "ISO8859-1") == 0) || (strcmp(encname, "ISO8859_1") == 0) || - (strcmp(encname, "ISO-8859-1") == 0)) + (strcmp(encname, "ISO-8859-1") == 0)) { fastEncoding = FAST_8859_1; - else if (strcmp(encname, "ISO646-US") == 0) + } else if (strcmp(encname, "UTF-8") == 0) { + fastEncoding = FAST_UTF_8; + jnuEncoding = (jstring)(*env)->NewGlobalRef(env, enc); + } else if (strcmp(encname, "ISO646-US") == 0) { fastEncoding = FAST_646_US; - else if (strcmp(encname, "Cp1252") == 0 || + } else if (strcmp(encname, "Cp1252") == 0 || /* This is a temporary fix until we move */ /* to wide character versions of all Windows */ /* calls. */ - strcmp(encname, "utf-16le") == 0) + strcmp(encname, "utf-16le") == 0) { fastEncoding = FAST_CP1252; - else { + } else { fastEncoding = NO_FAST_ENCODING; jnuEncoding = (jstring)(*env)->NewGlobalRef(env, enc); } @@ -750,24 +843,10 @@ CHECK_NULL(String_getBytes_ID); String_init_ID = (*env)->GetMethodID(env, strClazz, "", "([BLjava/lang/String;)V"); + String_coder_ID = (*env)->GetFieldID(env, strClazz, "coder", "B"); + String_value_ID = (*env)->GetFieldID(env, strClazz, "value", "[B"); } -static jboolean isJNUEncodingSupported = JNI_FALSE; -static jboolean jnuEncodingSupported(JNIEnv *env) { - jboolean exe; - if (isJNUEncodingSupported == JNI_TRUE) { - return JNI_TRUE; - } - isJNUEncodingSupported = (jboolean) JNU_CallStaticMethodByName ( - env, &exe, - "java/nio/charset/Charset", - "isSupported", - "(Ljava/lang/String;)Z", - jnuEncoding).z; - return isJNUEncodingSupported; -} - - JNIEXPORT jstring NewStringPlatform(JNIEnv *env, const char *str) { @@ -777,10 +856,6 @@ JNIEXPORT jstring JNICALL JNU_NewStringPlatform(JNIEnv *env, const char *str) { - jstring result = NULL; - jbyteArray hab = 0; - int len; - if (fastEncoding == NO_ENCODING_YET) { initializeEncoding(env); JNU_CHECK_EXCEPTION_RETURN(env, NULL); @@ -792,36 +867,9 @@ return newString646_US(env, str); if (fastEncoding == FAST_CP1252) return newStringCp1252(env, str); - - if ((*env)->EnsureLocalCapacity(env, 2) < 0) - return NULL; - - len = (int)strlen(str); - hab = (*env)->NewByteArray(env, len); - if (hab != 0) { - jclass strClazz = JNU_ClassString(env); - CHECK_NULL_RETURN(strClazz, 0); - (*env)->SetByteArrayRegion(env, hab, 0, len, (jbyte *)str); - if (jnuEncodingSupported(env)) { - result = (*env)->NewObject(env, strClazz, - String_init_ID, hab, jnuEncoding); - } else { - /*If the encoding specified in sun.jnu.encoding is not endorsed - by "Charset.isSupported" we have to fall back to use String(byte[]) - explicitly here without specifying the encoding name, in which the - StringCoding class will pickup the iso-8859-1 as the fallback - converter for us. - */ - jmethodID mid = (*env)->GetMethodID(env, strClazz, - "", "([B)V"); - if (mid != NULL) { - result = (*env)->NewObject(env, strClazz, mid, hab); - } - } - (*env)->DeleteLocalRef(env, hab); - return result; - } - return NULL; + if (fastEncoding == FAST_UTF_8) + return newStringUTF8(env, str); + return newStringJava(env, str); } JNIEXPORT const char * @@ -830,27 +878,10 @@ return JNU_GetStringPlatformChars(env, jstr, isCopy); } -JNIEXPORT const char * JNICALL -JNU_GetStringPlatformChars(JNIEnv *env, jstring jstr, jboolean *isCopy) -{ +static const char* getStringBytes(JNIEnv *env, jstring jstr) { char *result = NULL; jbyteArray hab = 0; - if (isCopy) - *isCopy = JNI_TRUE; - - if (fastEncoding == NO_ENCODING_YET) { - initializeEncoding(env); - JNU_CHECK_EXCEPTION_RETURN(env, 0); - } - - if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET)) - return getString8859_1Chars(env, jstr); - if (fastEncoding == FAST_646_US) - return getString646_USChars(env, jstr); - if (fastEncoding == FAST_CP1252) - return getStringCp1252Chars(env, jstr); - if ((*env)->EnsureLocalCapacity(env, 2) < 0) return 0; @@ -883,6 +914,80 @@ return result; } +static const char* +getStringUTF8(JNIEnv *env, jstring jstr) +{ + int i; + char *result; + jarray value; + jint len; + const jbyte *str; + jint rlen; + int ri; + jbyte coder = (*env)->GetByteField(env, jstr, String_coder_ID); + if (coder != LATIN1) { + return getStringBytes(env, jstr); + } + value = (jarray)(*env)->GetObjectField(env, jstr, String_value_ID); + len = (*env)->GetArrayLength(env, value); + str = (jbyte*)(*env)->GetPrimitiveArrayCritical(env, value, NULL); + + if (str == NULL) { + return NULL; + } + + rlen = len; + // we need two bytes for each latin-1 char above 127 (negative jbytes) + for (i = 0; i < len; i++) { + if (str[i] < 0) { + rlen++; + } + } + + result = MALLOC_MIN4(rlen); + if (result == NULL) { + (*env)->ReleasePrimitiveArrayCritical(env, jstr, (void *)str, 0); + JNU_ThrowOutOfMemoryError(env, 0); + return NULL; + } + + for (ri = 0, i = 0; i < len; i++) { + jbyte c = str[i]; + if (c < 0) { + result[ri++] = (char)(0xc0 | ((c & 0xff) >> 6)); + result[ri++] = (char)(0x80 | (c & 0x3f)); + } else { + result[ri++] = c; + } + } + result[rlen] = '\0'; + (*env)->ReleasePrimitiveArrayCritical(env, jstr, (void *)str, 0); + return result; +} + +JNIEXPORT const char * JNICALL +JNU_GetStringPlatformChars(JNIEnv *env, jstring jstr, jboolean *isCopy) +{ + + if (isCopy) + *isCopy = JNI_TRUE; + + if (fastEncoding == NO_ENCODING_YET) { + initializeEncoding(env); + JNU_CHECK_EXCEPTION_RETURN(env, 0); + } + + if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET)) + return getString8859_1Chars(env, jstr); + if (fastEncoding == FAST_646_US) + return getString646_USChars(env, jstr); + if (fastEncoding == FAST_CP1252) + return getStringCp1252Chars(env, jstr); + if (fastEncoding == FAST_UTF_8) + return getStringUTF8(env, jstr); + else + return getStringBytes(env, jstr); +} JNIEXPORT void JNICALL JNU_ReleaseStringPlatformChars(JNIEnv *env, jstring jstr, const char *str) { diff --git a/src/java.base/share/native/libjava/jni_util.h b/src/java.base/share/native/libjava/jni_util.h --- a/src/java.base/share/native/libjava/jni_util.h +++ b/src/java.base/share/native/libjava/jni_util.h @@ -382,7 +382,8 @@ NO_FAST_ENCODING, /* Platform encoding is not fast */ FAST_8859_1, /* ISO-8859-1 */ FAST_CP1252, /* MS-DOS Cp1252 */ - FAST_646_US /* US-ASCII : ISO646-US */ + FAST_646_US, /* US-ASCII : ISO646-US */ + FAST_UTF_8 }; int getFastEncoding(); diff --git a/test/java/lang/String/nativeEncoding/StringPlatformChars.java b/test/java/lang/String/nativeEncoding/StringPlatformChars.java new file mode 100644 --- /dev/null +++ b/test/java/lang/String/nativeEncoding/StringPlatformChars.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + + /* + * @test + * @run main/native StringPlatformChars + */ +import java.util.Arrays; + +public class StringPlatformChars { + + private static final String JNU_ENCODING = System.getProperty("sun.jnu.encoding"); + + public static void main(String... args) throws Exception { + System.loadLibrary("stringPlatformChars"); + + // Test varying lengths, provoking different allocation paths + StringBuilder unicodeSb = new StringBuilder(); + StringBuilder asciiSb = new StringBuilder(); + StringBuilder latinSb = new StringBuilder(); + + for (int i = 0; i < 2000; i++) { + unicodeSb.append('\uFEFE'); + testString(unicodeSb.toString()); + + asciiSb.append('x'); + testString(asciiSb.toString()); + + latinSb.append('\u00FE'); + testString(latinSb.toString()); + + testString(latinSb.toString() + asciiSb.toString() + unicodeSb.toString()); + } + + // Exhaustively test simple Strings made up of all possible chars: + for (char c = '\u0001'; c < Character.MAX_VALUE; c++) { + testString(String.valueOf(c)); + } + // Special case: \u0000 is treated as end-of-string in the native code, + // so strings with it should be truncated: + if (getBytes("\u0000abcdef").length != 0 || + getBytes("a\u0000bcdef").length != 1) { + System.out.println("Mismatching values for strings including \\u0000"); + throw new AssertionError(); + } + } + + private static void testString(String s) throws Exception { + byte[] nativeBytes = getBytes(s); + byte[] stringBytes = s.getBytes(JNU_ENCODING); + + if (!Arrays.equals(nativeBytes, stringBytes)) { + System.out.println("Mismatching values for: '" + s + "'"); + System.out.println("Native: " + Arrays.toString(nativeBytes)); + System.out.println("String: " + Arrays.toString(stringBytes)); + throw new AssertionError(s); + } + + String javaNewS = new String(nativeBytes, JNU_ENCODING); + String nativeNewS = newString(nativeBytes); + if (!javaNewS.equals(nativeNewS)) { + System.out.println("New string via native doesn't match via java: '" + javaNewS + "' and '" + nativeNewS + "'"); + throw new AssertionError(s); + } + } + + static native byte[] getBytes(String string); + + static native String newString(byte[] bytes); +} diff --git a/test/java/lang/String/nativeEncoding/libstringPlatformChars.c b/test/java/lang/String/nativeEncoding/libstringPlatformChars.c new file mode 100644 --- /dev/null +++ b/test/java/lang/String/nativeEncoding/libstringPlatformChars.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include +#include + +#include "jni.h" +#include "jni_util.h" + +JNIEXPORT jbyteArray JNICALL +Java_StringPlatformChars_getBytes(JNIEnv *env, jclass unused, jstring value) +{ + const char* str; + int len; + jbyteArray bytes = NULL; + + str = JNU_GetStringPlatformChars(env, value, NULL); + if (str == NULL) { + return NULL; + } + len = (int)strlen(str); + bytes = (*env)->NewByteArray(env, len); + if (bytes != 0) { + jclass strClazz = JNU_ClassString(env); + if (strClazz == NULL) { + return NULL; + } + (*env)->SetByteArrayRegion(env, bytes, 0, len, (jbyte *)str); + + return bytes; + } + return NULL; +} + +JNIEXPORT jstring JNICALL +Java_StringPlatformChars_newString(JNIEnv *env, jclass unused, jbyteArray bytes) +{ + char* str; + int len = (*env)->GetArrayLength(env, bytes); + int i; + jbyte* jbytes; + str = (char*)malloc(len + 1); + jbytes = (*env)->GetPrimitiveArrayCritical(env, bytes, NULL); + if (jbytes == NULL) { + return NULL; + } + for (i = 0; i < len; i++) { + str[i] = (char)jbytes[i]; + } + str[len] = '\0'; + (*env)->ReleasePrimitiveArrayCritical(env, bytes, (void*)str, 0); + + return JNU_NewStringPlatform(env, str); +} +