--- /dev/null 2018-12-11 14:42:27.088072338 -0500 +++ new/test/jdk/java/util/jar/Manifest/ValueUtf8Coding.java 2018-12-18 10:31:52.156559387 -0500 @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.jar.Attributes; +import java.util.jar.Attributes.Name; +import java.util.jar.Manifest; +import java.util.List; +import java.util.ArrayList; + +import org.testng.annotations.Test; +import static org.testng.Assert.*; + +/** + * @test + * @bug 8066619 + * @run testng ValueUtf8Coding + * @summary Tests encoding and decoding manifest header values to and from + * UTF-8 with the complete Unicode character set. + */ /* + * see also "../tools/launcher/UnicodeTest.java" for manifest attributes + * parsed during launch + */ +public class ValueUtf8Coding { + + /** + * Maximum number of bytes of UTF-8 encoded characters in one header value. + *
+ * There are too many different Unicode code points (more than one million)
+ * to fit all into one manifest value. The specifications state:
+ *
+ * This test assumes that a manifest is encoded and decoded correctly if
+ * writing and then reading it again results in a manifest with identical
+ * values as the original. The test is not about other aspects of writing
+ * and reading manifests than only that, given the fact and the way it
+ * works for some characters such as the most widely and often used ones,
+ * it also works for the complete Unicode character set just the same.
+ *
+ * Only header values are tested. The set of allowed characters for header
+ * names are much more limited and are a different topic entirely and most
+ * simple ones are used here as necessary just to get valid and different
+ * ones (see {@link #azName}).
+ *
+ * Because the current implementation under test uses different portions
+ * of code depending on where the value occurs to read or write, each
+ * character is tested in each of the three positions:Implementations should support 65535-byte (not character) header
+ * values, and 65535 headers per file. They might run out of memory,
+ * but there should not be hard-coded limits below these values.
+ *
+ * @see
+ * Notes on Manifest and Signature Files
+ */
+ static final int SUPPORTED_VALUE_LENGTH = 65535;
+
+ /**
+ * Returns {@code true} if {@code codePoint} is known not to be a supported
+ * character in manifest header values. Explicitly forbidden in manifest
+ * header values are according to a statement from the specifications:
+ * otherchar: any UTF-8 character except NUL, CR and LF
.
+ * {@code NUL} ({@code 0x0}), however, works just fine and might have been
+ * used and might still be.
+ *
+ * @see
+ * Jar File Specification
+ */
+ static boolean isUnsupportedManifestValueCharacter(int codePoint) {
+ return codePoint == '\r' /* CR */ || codePoint == '\n' /* LF */;
+ };
+
+ /**
+ * Produces a list of strings with all Unicode characters except those
+ * explicitly invalid in manifest header values.
+ * Each string is filled with as many characters as fit into
+ * {@link #SUPPORTED_VALUE_LENGTH} bytes with UTF-8 encoding except the
+ * last string which contains the remaining characters. Each of those
+ * strings becomes a header value the number of which 65535 should be
+ * supported per file.
+ *
+ * @see
+ * Notes on Manifest and Signature Files
+ */
+ static List
+ *
+ * Implementation of writing the main section headers in
+ * {@link Attributes#writeMain(java.io.DataOutputStream)} differs from the
+ * one writing named section headers in
+ * {@link Attributes#write(java.io.DataOutputStream)} regarding the special
+ * order of {@link Name#MANIFEST_VERSION} and
+ * {@link Name#SIGNATURE_VERSION} and also
+ * {@link Manifest#read(java.io.InputStream)} at least potentially reads
+ * main sections differently than reading named sections names headers in
+ * {@link Attributes#read(Manifest.FastInputStream, byte[])}.
+ */
+ @Test
+ public void testCompleteUnicodeCharacterSet() throws IOException {
+ Manifest mf = new Manifest();
+ mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
+
+ List