--- old/make/lib/CoreLibraries.gmk 2015-07-07 17:00:13.751227600 +0300 +++ new/make/lib/CoreLibraries.gmk 2015-07-07 17:00:13.474616600 +0300 @@ -330,6 +330,7 @@ -export:JLI_CmdToArgs \ -export:JLI_GetStdArgc \ -export:JLI_GetStdArgs \ + -export:JLI_DecodeArgs \ advapi32.lib \ comctl32.lib \ user32.lib, \ --- old/src/java.base/share/classes/java/io/Console.java 2015-07-07 17:00:15.557731800 +0300 +++ new/src/java.base/share/classes/java/io/Console.java 2015-07-07 17:00:15.304547500 +0300 @@ -564,7 +564,7 @@ } catch (Exception x) {} } if (cs == null) - cs = Charset.defaultCharset(); + cs = Boolean.getBoolean("windows.UnicodeConsole") ? Charset.defaultUnicodeCharset() : Charset.defaultCharset(); out = StreamEncoder.forOutputStreamWriter( new FileOutputStream(FileDescriptor.out), writeLock, --- old/src/java.base/share/classes/java/nio/charset/Charset.java 2015-07-07 17:00:17.259449600 +0300 +++ new/src/java.base/share/classes/java/nio/charset/Charset.java 2015-07-07 17:00:17.008839600 +0300 @@ -593,6 +593,7 @@ } private static volatile Charset defaultCharset; + private static volatile Charset defaultUnicodeCharset; /** * Returns the default charset of this Java virtual machine. @@ -620,6 +621,34 @@ return defaultCharset; } + /** + * Returns the default unicode charset of this Java virtual machine. + * + *

The default unicode charset is determined during virtual-machine startup + * and depends on command line option "file.encoding.unicode". + * UTF8 is used by default. + * + * @return A charset object for the default unicode charset + * + */ + public static Charset defaultUnicodeCharset() { + if (defaultUnicodeCharset == null) { + synchronized (Charset.class) { + defaultUnicodeCharset = forName("UTF-8"); + + String csn = AccessController.doPrivileged( + new GetPropertyAction("file.encoding.unicode")); + if (csn != null) { + Charset cs = lookup(csn); + if (cs != null) { + defaultUnicodeCharset = cs; + } + } + } + } + return defaultUnicodeCharset; + } + /* -- Instance fields and methods -- */ --- old/src/java.base/share/classes/sun/launcher/LauncherHelper.java 2015-07-07 17:00:18.982244300 +0300 +++ new/src/java.base/share/classes/sun/launcher/LauncherHelper.java 2015-07-07 17:00:18.729944300 +0300 @@ -589,6 +589,7 @@ } private static final String encprop = "sun.jnu.encoding"; + private static final String encprop_unicode = "file.encoding.unicode"; private static String encoding = null; private static boolean isCharsetSupported = false; @@ -599,7 +600,17 @@ static String makePlatformString(boolean printToStderr, byte[] inArray) { initOutput(printToStderr); if (encoding == null) { - encoding = System.getProperty(encprop); + if (Boolean.getBoolean("windows.UnicodeConsole")) { + encoding = System.getProperty(encprop_unicode); + if (encoding == null || !Charset.isSupported(encoding)) { + encoding = Charset.defaultUnicodeCharset().name(); + } + if (!Charset.isSupported(encoding)) { + encoding = System.getProperty(encprop); + } + } else { + encoding = System.getProperty(encprop); + } isCharsetSupported = Charset.isSupported(encoding); } try { --- old/src/java.base/share/native/launcher/main.c 2015-07-07 17:00:20.634898000 +0300 +++ new/src/java.base/share/native/launcher/main.c 2015-07-07 17:00:20.396757800 +0300 @@ -106,17 +106,12 @@ } } } - JLI_CmdToArgs(GetCommandLine()); - margc = JLI_GetStdArgc(); - // add one more to mark the end - margv = (char **)JLI_MemAlloc((margc + 1) * (sizeof(char *))); - { - int i = 0; - StdArg *stdargs = JLI_GetStdArgs(); - for (i = 0 ; i < margc ; i++) { - margv[i] = stdargs[i].arg; - } - margv[i] = NULL; + + // On Windows command line arguments could be encoded as Unicode + // if it is declared explicitly. + // Try to parse them as Unicode and if failed, then parse as ASCII. + if (!JLI_DecodeArgs(&margc, &margv)) { + exit(1); } #else /* *NIXES */ margc = argc; --- old/src/java.base/share/native/libjli/jli_util.h 2015-07-07 17:00:22.269086200 +0300 +++ new/src/java.base/share/native/libjli/jli_util.h 2015-07-07 17:00:22.032350400 +0300 @@ -43,8 +43,9 @@ jboolean has_wildcard; } StdArg; -StdArg *JLI_GetStdArgs(); int JLI_GetStdArgc(); +StdArg *JLI_GetStdArgs(); +void JLI_ReleaseStdArgs(); #define JLI_StrLen(p1) strlen((p1)) #define JLI_StrChr(p1, p2) strchr((p1), (p2)) @@ -69,6 +70,7 @@ #define JLI_StrNCaseCmp(p1, p2, p3) strnicmp((p1), (p2), (p3)) int JLI_Snprintf(char *buffer, size_t size, const char *format, ...); void JLI_CmdToArgs(char *cmdline); +jboolean JLI_DecodeArgs(int *pargc, char*** pargv); #define JLI_Lseek _lseeki64 #define JLI_PutEnv _putenv #define JLI_GetPid _getpid --- old/src/java.base/unix/native/libjli/java_md_common.c 2015-07-07 17:00:23.891177700 +0300 +++ new/src/java.base/unix/native/libjli/java_md_common.c 2015-07-07 17:00:23.650287300 +0300 @@ -314,8 +314,18 @@ return 0; } +void +JLI_ReleaseStdArgs() +{ +} + jobjectArray CreateApplicationArgs(JNIEnv *env, char **strv, int argc) { return NewPlatformStringArray(env, strv, argc); } + +jboolean +JLI_DecodeArgs(int *pargc, char*** pargv) { + return JLI_TRUE; +} --- old/src/java.base/windows/native/libjava/java_props_md.c 2015-07-07 17:00:25.519628200 +0300 +++ new/src/java.base/windows/native/libjava/java_props_md.c 2015-07-07 17:00:25.281398400 +0300 @@ -145,6 +145,8 @@ cp = GetConsoleCP(); if (cp >= 874 && cp <= 950) sprintf(buf, "ms%d", cp); + else if (cp == 65001) + sprintf(buf, "UTF-8"); else sprintf(buf, "cp%d", cp); return buf; @@ -688,17 +690,27 @@ } hStdOutErr = GetStdHandle(STD_OUTPUT_HANDLE); - if (hStdOutErr != INVALID_HANDLE_VALUE && - GetFileType(hStdOutErr) == FILE_TYPE_CHAR) { - sprops.sun_stdout_encoding = getConsoleEncoding(); + if (hStdOutErr != INVALID_HANDLE_VALUE) { + switch(GetFileType(hStdOutErr)) { + case FILE_TYPE_DISK: + case FILE_TYPE_CHAR: + case FILE_TYPE_PIPE: + sprops.sun_stdout_encoding = getConsoleEncoding(); + break; + } } hStdOutErr = GetStdHandle(STD_ERROR_HANDLE); - if (hStdOutErr != INVALID_HANDLE_VALUE && - GetFileType(hStdOutErr) == FILE_TYPE_CHAR) { - if (sprops.sun_stdout_encoding != NULL) - sprops.sun_stderr_encoding = sprops.sun_stdout_encoding; - else - sprops.sun_stderr_encoding = getConsoleEncoding(); + if (hStdOutErr != INVALID_HANDLE_VALUE) { + switch (GetFileType(hStdOutErr)) { + case FILE_TYPE_DISK: + case FILE_TYPE_CHAR: + case FILE_TYPE_PIPE: + if (sprops.sun_stdout_encoding != NULL) + sprops.sun_stderr_encoding = sprops.sun_stdout_encoding; + else + sprops.sun_stderr_encoding = getConsoleEncoding(); + break; + } } } } --- old/src/java.base/windows/native/libjli/cmdtoargs.c 2015-07-07 17:00:27.157709900 +0300 +++ new/src/java.base/windows/native/libjli/cmdtoargs.c 2015-07-07 17:00:26.913008400 +0300 @@ -193,6 +193,16 @@ return stdargs; } +void JLI_ReleaseStdArgs() { + StdArg* stdarg; + for (stdarg = stdargs; stdarg < stdargs + stdargc; ++stdarg) { + JLI_MemFree(stdarg->arg); + } + JLI_MemFree(stdargs); + stdargs = NULL; + stdargc = 0; +} + void JLI_CmdToArgs(char* cmdline) { int nargs = 0; StdArg* argv = NULL; --- old/src/java.base/windows/native/libjli/java_md.c 2015-07-07 17:00:28.788745700 +0300 +++ new/src/java.base/windows/native/libjli/java_md.c 2015-07-07 17:00:28.548276600 +0300 @@ -1013,3 +1013,114 @@ JLI_MemFree(filteredargs); return outArray; } + +/* + * Checks the command line arguments to find whether they are on Unicode. + */ +static jboolean +IsArgsUnicodeDecodingRequired(int argc, char **argv) { + int i; + + if (argc < 2) { + return JNI_FALSE; + } + + for (i = 1; i < argc; ++i) { + char *arg = argv[i]; + if (*arg != '-') { + break; + } + if (stricmp(arg, "-Dwindows.UnicodeConsole=true") == 0) { + return JNI_TRUE; + } + } + return JNI_FALSE; +} + +/* + * Translate Unicode command line arguments to multi byte chars. + */ +static jboolean +DecodeUnicodeArgs(LPWSTR cmd_line_wide, LPSTR* cmd_line_utf8) { + int cmd_line_utf8_length; + + // First call to WideCharToMultiByte calculates destination buffer length. + cmd_line_utf8_length = WideCharToMultiByte(CP_UTF8, + 0, + cmd_line_wide, + -1, + NULL, + 0, + NULL, + NULL); + if (!cmd_line_utf8_length) { + JLI_ReportErrorMessage( + "WideCharToMultiByte failed to calculate destination buffer length " + "with error code %d", GetLastError()); + return JNI_FALSE; + } + + // Allocate buffer to receive conversion to UTF-8. + *cmd_line_utf8 = JLI_MemAlloc((size_t)cmd_line_utf8_length * sizeof(CHAR)); + + // Second call to WideCharToMultiByte does the actual conversion. Note that + // we are forcing the encoding to UTF-8. This only works right if + // getEncodingInternal in java_props_md.c also returns UTF-8. + if (!WideCharToMultiByte(CP_UTF8, + 0, + cmd_line_wide, + -1, + *cmd_line_utf8, + cmd_line_utf8_length, + NULL, + NULL)) { + JLI_ReportErrorMessage( + "WideCharToMultiByte failed to convert to UTF-8 " + "with error code %d", GetLastError()); + JLI_MemFree(*cmd_line_utf8); + return JNI_FALSE; + } + return JNI_TRUE; +} + +/* + * Translate command line arguments from Windows format to argc+argv. + */ +static void +ConvertWinArgsToCommonFormat(LPSTR cmd_line, int *pargc, char*** pargv) { + JLI_CmdToArgs(cmd_line); + + *pargc = JLI_GetStdArgc(); + // add one more to mark the end + *pargv = (char **)JLI_MemAlloc((*pargc + 1) * (sizeof(char *))); + { + int i = 0; + StdArg *stdargs = JLI_GetStdArgs(); + for (i = 0 ; i < *pargc ; ++i) { + (*pargv)[i] = stdargs[i].arg; + } + (*pargv)[i] = NULL; + } +} + +jboolean +JLI_DecodeArgs(int *pargc, char*** pargv) { + LPSTR cmdLineUtf8; + + // On Windows command line arguments could be encoded as Unicode + // if it is declared explicitly. + // Try to parse them as Unicode and look for Unicode encoding request + if (!DecodeUnicodeArgs(GetCommandLineW(), &cmdLineUtf8)) { + return JNI_FALSE; + } + ConvertWinArgsToCommonFormat(cmdLineUtf8, pargc, pargv); + JLI_MemFree(cmdLineUtf8); + + // If arguments are not on Unicode, decode them as chars. + if (!IsArgsUnicodeDecodingRequired(*pargc, *pargv)) { + JLI_MemFree(*pargv); + JLI_ReleaseStdArgs(); + ConvertWinArgsToCommonFormat(GetCommandLine(), pargc, pargv); + } + return JNI_TRUE; +} --- /dev/null 2015-07-07 17:00:30.000000000 +0300 +++ new/test/tools/launcher/UnicodeCmdTest.sh 2015-07-07 17:00:30.136537100 +0300 @@ -0,0 +1,76 @@ +# +# Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + + +# @test +# @bug 8124977 +# @summary Test 8124977 cmdline encoding challenges on Windows +# @run shell UnicodeCmdTest.sh + +OS=`uname -s` +case "$OS" in + Windows* | CYGWIN* ) + ;; + * ) + echo "Only testing on Windows" + exit 0 + ;; +esac + +if [ "x${TESTSRC}" = "x" ]; then + echo "TESTSRC not set. Test cannot execute. Failed." + exit 1 +fi + +if [ "x${TESTJAVA}" = "x" ]; then + echo "TESTJAVA not set. Test cannot execute. Failed." + exit 1 +fi + +if [ "x${COMPILEJAVA}" = "x" ]; then + COMPILEJAVA="${TESTJAVA}" +fi + +JAVA="${TESTJAVA}/bin/java" +JAVAC="${COMPILEJAVA}/bin/javac" + +cp -f ${TESTSRC}/UnicodeCmdVerifier.java . + +# compile the class ourselves, so this can run as a standalone test + +${JAVAC} UnicodeCmdVerifier.java +RES="$?" +if [ ${RES} != 0 ]; then + echo 'FAIL: Cannot compile UnicodeCmdVerifier.java' + exit ${RES} +fi + +${JAVA} ${TESTVMOPTS} -Dwindows.UnicodeConsole=true -Dfile.encoding.unicode="UTF-8" UnicodeCmdVerifier Юникод + +RES="$?" +if [ ${RES} != 0 ]; then + echo 'FAIL: UnicodeCmdVerifier failed with '${RES} + exit ${RES} +fi + +echo 'PASS: UnicodeCmdVerifier works as expected' --- /dev/null 2015-07-07 17:00:31.000000000 +0300 +++ new/test/tools/launcher/UnicodeCmdVerifier.java 2015-07-07 17:00:31.388737200 +0300 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * This class ensures that the command line argument, passed as Unicode, + * matches the desired value. + * + * This is used by UnicodeCmdTest.sh + */ +class UnicodeCmdVerifier { + public static void main(String[] args) throws Exception { + if (args.length == 0 ) { + System.exit(1); + } + + String desired = new String("\u042e\u043d\u0438\u043a\u043e\u0434"); + if (args[0].equals(desired)) { + System.out.println("The argument matches the desired text: " + args[0]); + } else { + System.out.println("The argument does not match the desired text: " + + args[0] + " != " + desired); + System.exit(2); + } + } +}