# HG changeset patch # User redestad # Date 1497047044 -7200 # Sat Jun 10 00:24:04 2017 +0200 # Node ID 15dae6e0d183e8b8cf128ebe0ec9b3bcab3a624a # Parent 875fa66a13a242ec947ba309c99e2a88991dbf4f 8181147: JNI_GetStringPlatformChars should have a fast path for UTF-8 Reviewed-by: shade, chegar, erikj diff --git a/make/test/JtregNative.gmk b/make/test/JtregNative.gmk --- a/make/test/JtregNative.gmk +++ b/make/test/JtregNative.gmk @@ -44,12 +44,19 @@ # Add more directories here when needed. BUILD_JDK_JTREG_NATIVE_SRC += \ $(JDK_TOPDIR)/test/native_sanity \ + $(JDK_TOPDIR)/test/java/lang/String/nativeEncoding \ # BUILD_JDK_JTREG_OUTPUT_DIR := $(BUILD_OUTPUT)/support/test/jdk/jtreg/native BUILD_JDK_JTREG_IMAGE_DIR := $(TEST_IMAGE_DIR)/jdk/jtreg +ifeq ($(OPENJDK_TARGET_OS), windows) + BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := java.lib +else + BUILD_JDK_JTREG_LIBRARIES_LIBS_libstringPlatformChars := -ljava +endif + $(eval $(call SetupTestFilesCompilation, BUILD_JDK_JTREG_LIBRARIES, \ TYPE := LIBRARY, \ SOURCE_DIRS := $(BUILD_JDK_JTREG_NATIVE_SRC), \ diff --git a/src/java.base/share/native/include/jni.h b/src/java.base/share/native/include/jni.h --- a/src/java.base/share/native/include/jni.h +++ b/src/java.base/share/native/include/jni.h @@ -611,6 +611,10 @@ (JNIEnv *env, jstring str, jboolean *isCopy); void (JNICALL *ReleaseStringChars) (JNIEnv *env, jstring str, const jchar *chars); + const jbyte *(JNICALL *GetStringBytesCritical) + (JNIEnv *env, jstring str, jint *len, jbyte *encoding, jboolean *isCopy); + void (JNICALL *ReleaseStringBytesCritical) + (JNIEnv *env, jstring str, const jbyte *bytes); jstring (JNICALL *NewStringUTF) (JNIEnv *env, const char *utf); @@ -1611,6 +1615,13 @@ void ReleaseStringChars(jstring str, const jchar *chars) { functions->ReleaseStringChars(this,str,chars); } + const jbyte *GetStringBytesCritical(jstring str, jint *len, jbyte *encoding, jboolean *isCopy) { + return functions->GetStringBytesCritical(this,str,len,encoding,isCopy); + } + void ReleaseStringBytesCritical(jstring str, const jbyte *bytes) { + functions->ReleaseStringBytesCritical(this,str,bytes); + } + jstring NewStringUTF(const char *utf) { return functions->NewStringUTF(this,utf); diff --git a/src/java.base/share/native/libjava/jni_util.c b/src/java.base/share/native/libjava/jni_util.c --- a/src/java.base/share/native/libjava/jni_util.c +++ b/src/java.base/share/native/libjava/jni_util.c @@ -444,9 +444,8 @@ /* Optimized for char set ISO_8559_1 */ static jstring -newString8859_1(JNIEnv *env, const char *str) +newSizedString8859_1(JNIEnv *env, const char *str, int len) { - int len = (int)strlen(str); jchar buf[512]; jchar *str1; jstring result; @@ -469,6 +468,13 @@ return result; } +static jstring +newString8859_1(JNIEnv *env, const char *str) +{ + int len = (int)strlen(str); + return newSizedString8859_1(env, str, len); +} + static const char* getString8859_1Chars(JNIEnv *env, jstring jstr) { @@ -500,7 +506,6 @@ return result; } - /* Optimized for char set ISO646-US (us-ascii) */ static jstring newString646_US(JNIEnv *env, const char *str) @@ -671,8 +676,36 @@ static jmethodID String_init_ID; /* String(byte[], enc) */ static jmethodID String_getBytes_ID; /* String.getBytes(enc) */ -int getFastEncoding() { - return fastEncoding; +/* Optimized for char set UTF-8 */ +static jstring +newStringUTF8(JNIEnv *env, const char *str) +{ + jboolean isAscii = JNI_TRUE; + jstring result; + jbyteArray hab = NULL; + int len = 0; + char b; + for (b = str[len]; b != '\0'; len++, b = str[len]) { + if (isAscii && b & 0x80) { + isAscii = JNI_FALSE; + } + } + + if (isAscii) { + return newSizedString8859_1(env, str, len); + } + + hab = (*env)->NewByteArray(env, len); + if (hab != 0) { + jclass strClazz = JNU_ClassString(env); + CHECK_NULL_RETURN(strClazz, 0); + (*env)->SetByteArrayRegion(env, hab, 0, len, (jbyte *)str); + result = (*env)->NewObject(env, strClazz, + String_init_ID, hab, jnuEncoding); + (*env)->DeleteLocalRef(env, hab); + return result; + } + return NULL; } /* Initialize the fast encoding. If the "sun.jnu.encoding" property @@ -718,17 +751,20 @@ if ((strcmp(encname, "8859_1") == 0) || (strcmp(encname, "ISO8859-1") == 0) || (strcmp(encname, "ISO8859_1") == 0) || - (strcmp(encname, "ISO-8859-1") == 0)) + (strcmp(encname, "ISO-8859-1") == 0)) { fastEncoding = FAST_8859_1; - else if (strcmp(encname, "ISO646-US") == 0) + } else if (strcmp(encname, "UTF-8") == 0) { + fastEncoding = FAST_UTF_8; + jnuEncoding = (jstring)(*env)->NewGlobalRef(env, enc); + } else if (strcmp(encname, "ISO646-US") == 0) { fastEncoding = FAST_646_US; - else if (strcmp(encname, "Cp1252") == 0 || + } else if (strcmp(encname, "Cp1252") == 0 || /* This is a temporary fix until we move */ /* to wide character versions of all Windows */ /* calls. */ - strcmp(encname, "utf-16le") == 0) + strcmp(encname, "utf-16le") == 0) { fastEncoding = FAST_CP1252; - else { + } else { fastEncoding = NO_FAST_ENCODING; jnuEncoding = (jstring)(*env)->NewGlobalRef(env, enc); } @@ -792,6 +828,8 @@ return newString646_US(env, str); if (fastEncoding == FAST_CP1252) return newStringCp1252(env, str); + if (fastEncoding == FAST_UTF_8) + return newStringUTF8(env, str); if ((*env)->EnsureLocalCapacity(env, 2) < 0) return NULL; @@ -830,27 +868,10 @@ return JNU_GetStringPlatformChars(env, jstr, isCopy); } -JNIEXPORT const char * JNICALL -JNU_GetStringPlatformChars(JNIEnv *env, jstring jstr, jboolean *isCopy) -{ +static const char* getStringBytes(JNIEnv *env, jstring jstr) { char *result = NULL; jbyteArray hab = 0; - if (isCopy) - *isCopy = JNI_TRUE; - - if (fastEncoding == NO_ENCODING_YET) { - initializeEncoding(env); - JNU_CHECK_EXCEPTION_RETURN(env, 0); - } - - if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET)) - return getString8859_1Chars(env, jstr); - if (fastEncoding == FAST_646_US) - return getString646_USChars(env, jstr); - if (fastEncoding == FAST_CP1252) - return getStringCp1252Chars(env, jstr); - if ((*env)->EnsureLocalCapacity(env, 2) < 0) return 0; @@ -883,6 +904,74 @@ return result; } +static const char* +getStringUTF8(JNIEnv *env, jstring jstr) +{ + int i; + char *result; + jbyte encoding = 0; + jint len = 0; + const jbyte *str = (*env)->GetStringBytesCritical(env, jstr, &len, &encoding, NULL); + + if (str == NULL) { + return NULL; + } + + // only support ASCII encoding natively. + if (encoding == 0) { + result = (char*)str; + for (i = 0; i < len; i++) { + if (str[i] < 0) { + // not ASCII, abort to slow path + str = NULL; + break; + } + } + } + + + if (str == NULL) { + (*env)->ReleaseStringBytesCritical(env, jstr, str); + return getStringBytes(env, jstr); + } + + result = MALLOC_MIN4(len); + if (result == 0) { + (*env)->ReleaseStringBytesCritical(env, jstr, str); + JNU_ThrowOutOfMemoryError(env, 0); + return NULL; + } + for (i = 0; i < len; i++) { + result[i] = (char)str[i]; + } + result[len] = '\0'; + (*env)->ReleaseStringBytesCritical(env, jstr, str); + return result; +} + +JNIEXPORT const char * JNICALL +JNU_GetStringPlatformChars(JNIEnv *env, jstring jstr, jboolean *isCopy) +{ + + if (isCopy) + *isCopy = JNI_TRUE; + + if (fastEncoding == NO_ENCODING_YET) { + initializeEncoding(env); + JNU_CHECK_EXCEPTION_RETURN(env, 0); + } + + if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET)) + return getString8859_1Chars(env, jstr); + if (fastEncoding == FAST_646_US) + return getString646_USChars(env, jstr); + if (fastEncoding == FAST_CP1252) + return getStringCp1252Chars(env, jstr); + if (fastEncoding == FAST_UTF_8) + return getStringUTF8(env, jstr); + else + return getStringBytes(env, jstr); +} JNIEXPORT void JNICALL JNU_ReleaseStringPlatformChars(JNIEnv *env, jstring jstr, const char *str) { diff --git a/src/java.base/share/native/libjava/jni_util.h b/src/java.base/share/native/libjava/jni_util.h --- a/src/java.base/share/native/libjava/jni_util.h +++ b/src/java.base/share/native/libjava/jni_util.h @@ -382,7 +382,8 @@ NO_FAST_ENCODING, /* Platform encoding is not fast */ FAST_8859_1, /* ISO-8859-1 */ FAST_CP1252, /* MS-DOS Cp1252 */ - FAST_646_US /* US-ASCII : ISO646-US */ + FAST_646_US, /* US-ASCII : ISO646-US */ + FAST_UTF_8 }; int getFastEncoding();