--- old/src/java.base/share/classes/java/util/jar/Attributes.java 2020-02-10 14:32:11.000000000 -0500 +++ new/src/java.base/share/classes/java/util/jar/Attributes.java 2020-02-10 14:32:11.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -301,7 +301,6 @@ /* * Writes the current attributes to the specified data output stream. - * XXX Need to handle UTF8 values and break up lines longer than 72 bytes */ void write(DataOutputStream out) throws IOException { StringBuilder buffer = new StringBuilder(72); @@ -319,8 +318,6 @@ * Writes the current attributes to the specified data output stream, * make sure to write out the MANIFEST_VERSION or SIGNATURE_VERSION * attributes first. - * - * XXX Need to handle UTF8 values and break up lines longer than 72 bytes */ void writeMain(DataOutputStream out) throws IOException { StringBuilder buffer = new StringBuilder(72); --- old/src/java.base/share/classes/java/util/jar/Manifest.java 2020-02-10 14:32:13.000000000 -0500 +++ new/src/java.base/share/classes/java/util/jar/Manifest.java 2020-02-10 14:32:13.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.text.BreakIterator; import java.util.HashMap; import java.util.Map; @@ -230,25 +231,74 @@ } /** + * Returns {@code true} if the passed byte as parameter {@code b} + * is not the first (or only) byte of a Unicode character encoded in UTF-8 + * and {@code false} otherwise. + * + * @see + * RFC 3629 - UTF-8, a transformation format of ISO 10646 + * @see StringCoding#isNotContinuation(int) + * @see sun.nio.cs.UTF_8.Decoder#isNotContinuation(int) + */ + private static boolean isContinuation(byte b) { + return (b & 0xc0) == 0x80; + } + + /** * Writes {@code line} to {@code out} with line breaks and continuation - * spaces within the limits of 72 bytes of contents per line followed - * by a line break. + * spaces within the limits of 72 bytes of contents per line + * keeping byte sequences of characters encoded in UTF-8 together + * also if the same character is encoded with more than one byte or + * consists of a character sequence containing combining diacritical marks + * followed by a line break. + *

+ * Combining diacritical marks may be separated from the associated base + * character or other combining diacritical marks of that base character + * by a continuation line break ("{@code \r\n }") if the whole sequence of + * base character and all the combining diacritical marks belonging to it + * exceed 71 bytes in their binary form encoded with UTF-8. This limit is + * only 71 bytes rather than 72 because continuation lines start with a + * space that uses the first byte of the 72 bytes each line can hold up to + * and the first line provides even less space for the value because it + * starts with the name. */ static void println72(OutputStream out, String line) throws IOException { - if (!line.isEmpty()) { - byte[] lineBytes = line.getBytes(UTF_8.INSTANCE); - int length = lineBytes.length; - // first line can hold one byte more than subsequent lines which - // start with a continuation line break space - out.write(lineBytes[0]); - int pos = 1; - while (length - pos > 71) { - out.write(lineBytes, pos, 71); - pos += 71; + int linePos = 0; // number of bytes already put out on current line + BreakIterator boundary = BreakIterator.getCharacterInstance(); + boundary.setText(line); + int start = boundary.first(), end; + while ((end = boundary.next()) != BreakIterator.DONE) { + String character = line.substring(start, end); + start = end; + byte[] characterBytes = character.getBytes(UTF_8.INSTANCE); + int characterLength = characterBytes.length; + // Put out a break onto a new line if the character does not fit on + // the current line anymore but fits on a whole new line. + // In other words, if the current character does not fit on one + // whole line alone, fill the current line first before breaking + // inside of the character onto a new line. + if (linePos + characterLength > 72 && characterLength < 72) { + println(out); + out.write(' '); + linePos = 1; + } + int characterPos = 0; // number of bytes of current character + // already put out + while (linePos + characterLength - characterPos > 72) { + int nextBreakPos = 72 - linePos; + while (isContinuation( + characterBytes[characterPos + nextBreakPos])) { + nextBreakPos--; + } + out.write(characterBytes, characterPos, nextBreakPos); + characterPos += nextBreakPos; println(out); out.write(' '); + linePos = 1; } - out.write(lineBytes, pos, length - pos); + out.write(characterBytes, + characterPos, characterLength - characterPos); + linePos += characterLength - characterPos; } println(out); } --- old/test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java 2020-02-10 14:32:14.000000000 -0500 +++ new/test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java 2020-02-10 14:32:14.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,15 +29,25 @@ * @library /test/lib */ +import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.jar.JarFile; +import java.util.Arrays; +import java.util.stream.Collectors; import java.util.jar.Attributes.Name; +import java.util.jar.Manifest; +import java.util.jar.JarFile; import java.util.jar.JarEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipEntry; +import static java.util.jar.JarFile.MANIFEST_NAME; import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.file.StandardCopyOption.COPY_ATTRIBUTES; +import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import jdk.test.lib.SecurityTools; import jdk.test.lib.util.JarUtils; @@ -46,7 +56,7 @@ /** * this name will break across lines in MANIFEST.MF at the - * middle of a two-byte utf-8 encoded character due to its e acute letter + * middle of a two-byte UTF-8 encoded character due to its e acute letter * at its exact position. * * because no file with such a name exists {@link JarUtils} will add the @@ -63,53 +73,58 @@ static final String anotherName = "LineBrokenMultiByteCharacterA1234567890B1234567890C123456789D1234567890.class"; - static final String alias = "a"; - static final String keystoreFileName = "test.jks"; - static final String manifestFileName = "MANIFEST.MF"; + static final String ALIAS = "A"; + static final String KEYSTORE_FILENAME = "test.jks"; + static final String SOME_OTHER_SIG_FILE = "META-INF/FAKE_SIG.DSA"; public static void main(String[] args) throws Exception { prepare(); testSignJar("test.jar"); - testSignJarNoManifest("test-no-manifest.jar"); testSignJarUpdate("test-update.jar", "test-updated.jar"); } static void prepare() throws Exception { - SecurityTools.keytool("-keystore", keystoreFileName, "-genkeypair", + SecurityTools.keytool("-keystore", KEYSTORE_FILENAME, "-genkeypair", "-keyalg", "dsa", "-storepass", "changeit", "-keypass", "changeit", "-storetype", - "JKS", "-alias", alias, "-dname", "CN=X", "-validity", "366") + "JKS", "-alias", ALIAS, "-dname", "CN=X", "-validity", "366") .shouldHaveExitValue(0); - Files.write(Paths.get(manifestFileName), (Name. + new File(MANIFEST_NAME).getParentFile().mkdirs(); + Files.write(Paths.get(MANIFEST_NAME), (Name. MANIFEST_VERSION.toString() + ": 1.0\r\n").getBytes(UTF_8)); - } - static void testSignJar(String jarFileName) throws Exception { - JarUtils.createJar(jarFileName, manifestFileName, testClassName); - verifyJarSignature(jarFileName); + // prevent jarsigner from assuming it was safe to rewrite the manifest + // and its line breaks assuming there were no other signatures present + Files.write(Paths.get(SOME_OTHER_SIG_FILE), new byte[] {}); } - static void testSignJarNoManifest(String jarFileName) throws Exception { - JarUtils.createJar(jarFileName, testClassName); + static void testSignJar(String jarFileName) throws Exception { + JarUtils.createJar(jarFileName, testClassName, SOME_OTHER_SIG_FILE); + createManifestEntries(jarFileName); + rebreakManifest72bytes(jarFileName); verifyJarSignature(jarFileName); } static void testSignJarUpdate( String initialFileName, String updatedFileName) throws Exception { - JarUtils.createJar(initialFileName, manifestFileName, anotherName); - SecurityTools.jarsigner("-keystore", keystoreFileName, "-storetype", + JarUtils.createJar(initialFileName, testClassName, anotherName, + SOME_OTHER_SIG_FILE); + createManifestEntries(initialFileName); + rebreakManifest72bytes(initialFileName); + removeJarEntry(initialFileName, testClassName); + SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, "-storetype", "JKS", "-storepass", "changeit", "-debug", initialFileName, - alias).shouldHaveExitValue(0); + ALIAS).shouldHaveExitValue(0); JarUtils.updateJar(initialFileName, updatedFileName, testClassName); verifyJarSignature(updatedFileName); } static void verifyJarSignature(String jarFileName) throws Exception { // actually sign the jar - SecurityTools.jarsigner("-keystore", keystoreFileName, "-storetype", - "JKS", "-storepass", "changeit", "-debug", jarFileName, alias) + SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, "-storetype", + "JKS", "-storepass", "changeit", "-debug", jarFileName, ALIAS) .shouldHaveExitValue(0); try ( @@ -130,7 +145,7 @@ * the signature file does not even contain the desired entry at all. * * this relies on {@link java.util.jar.Manifest} breaking lines unaware - * of bytes that belong to the same multi-byte utf characters. + * of bytes that belong to the same multi-byte UTF-8 encoded characters. */ static void verifyClassNameLineBroken(JarFile jar, String className) throws IOException { @@ -142,7 +157,7 @@ throw new AssertionError(className + " not found in manifest"); } - JarEntry manifestEntry = jar.getJarEntry(JarFile.MANIFEST_NAME); + JarEntry manifestEntry = jar.getJarEntry(MANIFEST_NAME); try ( InputStream manifestIs = jar.getInputStream(manifestEntry); ) { @@ -159,7 +174,7 @@ } if (bytesMatched < eAcuteBroken.length) { throw new AssertionError("self-test failed: multi-byte " - + "utf-8 character not broken across lines"); + + "UTF-8 encoded character not broken across lines"); } } } @@ -183,4 +198,108 @@ } } + static void createManifestEntries(String jarFileName) throws Exception { + JarUtils.updateJarFile(Paths.get(jarFileName), + Paths.get("."), Paths.get(MANIFEST_NAME)); + SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, + "-storepass", "changeit", "-debug", jarFileName, ALIAS) + .shouldHaveExitValue(0); + // remove the signature files, only manifest is used + removeJarEntry(jarFileName, + "META-INF/" + ALIAS + ".SF", + "META-INF/" + ALIAS + ".DSA"); + } + + @SuppressWarnings("deprecation") + static void removeJarEntry(String jarFileName, String... entryNames) + throws IOException { + String aCopy = "swap-" + jarFileName; + JarUtils.updateJar(jarFileName, aCopy, Arrays.asList(entryNames) + .stream().collect(Collectors.toMap(e -> e, e -> false))); + Files.copy(Paths.get(aCopy), Paths.get(jarFileName), + COPY_ATTRIBUTES, REPLACE_EXISTING); + Files.delete(Paths.get(aCopy)); + } + + static void rebreakManifest72bytes(String jarFileName) throws Exception { + byte[] manifest; + try (ZipFile zip = new ZipFile(jarFileName)) { + ZipEntry zipEntry = zip.getEntry(MANIFEST_NAME); + manifest = zip.getInputStream(zipEntry).readAllBytes(); + } + Utils.echoManifest(manifest, MANIFEST_NAME + " before re-break:"); + byte[] manifest72 = rebreak72bytes(manifest); + Utils.echoManifest(manifest72, MANIFEST_NAME + " after re-break:"); + String aCopy = "swap-" + jarFileName; + JarUtils.updateManifest(jarFileName, aCopy, new Manifest() { @Override + public void write(OutputStream out) throws IOException { + out.write(manifest72); + } + }); + Files.copy(Paths.get(aCopy), Paths.get(jarFileName), + COPY_ATTRIBUTES, REPLACE_EXISTING); + Files.delete(Paths.get(aCopy)); + } + + /** + * Simulates a jar manifest as it would have been created by an earlier + * JDK by re-arranging the line break at exactly 72 bytes content thereby + * breaking the multi-byte UTF-8 encoded character under test like before + * resolution of bug 6202130. + *

+ * The returned manifest is accepted as unmodified by + * {@link jdk.security.jarsigner.JarSigner#updateDigests + * (ZipEntry,ZipFile,MessageDigest[],Manifest)} on line 985: + *

+     * if (!mfDigest.equalsIgnoreCase(base64Digests[i])) {
+     * 
+ * and therefore left unchanged when the jar is signed and also signature + * verification will check it. + */ + static byte[] rebreak72bytes(byte[] mf0) { + byte[] mf1 = new byte[mf0.length]; + int c0 = 0, c1 = 0; // bytes since last line start + for (int i0 = 0, i1 = 0; i0 < mf0.length; i0++, i1++) { + switch (mf0[i0]) { + case '\r': + if (i0 + 2 < mf0.length && + mf0[i0 + 1] == '\n' && mf0[i0 + 2] == ' ') { + // skip line break + i0 += 2; + i1 -= 1; + } else { + mf1[i1] = mf0[i0]; + c0 = c1 = 0; + } + break; + case '\n': + if (i0 + 1 < mf0.length && mf0[i0 + 1] == ' ') { + // skip line break + i0 += 1; + i1 -= 1; + } else { + mf1[i1] = mf0[i0]; + c0 = c1 = 0; + } + break; + case ' ': + if (c0 == 0) { + continue; + } + default: + c0++; + if (c1 == 72) { + mf1[i1++] = '\r'; + mf1[i1++] = '\n'; + mf1[i1++] = ' '; + c1 = 1; + } else { + c1++; + } + mf1[i1] = mf0[i0]; + } + } + return mf1; + } + } --- /dev/null 2020-02-10 14:32:15.000000000 -0500 +++ new/test/jdk/java/util/jar/Manifest/LineBreakCharacter.java 2020-02-10 14:32:15.000000000 -0500 @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.jar.Attributes; +import java.util.jar.Manifest; +import java.util.jar.Attributes.Name; +import java.util.List; +import java.util.LinkedList; + +import org.testng.annotations.Test; +import org.testng.annotations.DataProvider; +import static org.testng.Assert.*; + +/** + * @test + * @bug 6443578 6202130 + * @run testng LineBreakCharacter + * @summary Tests breaking manifest header values across lines in conjunction + * with Unicode characters encoded in UTF-8 with a variable number of bytes + * when reading and writing jar manifests results in valid UTF-8. + *

+ * The manifest line length limit (72 bytes) may be reached at a position + * between multiple bytes of a single UTF-8 encoded character. Although + * characters should not be broken across lines according to the specification + * the previous Manifest implementation did. + *

+ * This test makes sure that no character is broken apart across a line break + * when writing manifests and also that manifests are still read correctly + * whether or not characters encoded in UTF-8 with more than one byte are + * interrupted with and continued after a line break for compatibility when + * reading older manifests. + */ +public class LineBreakCharacter { + + static final int MANIFEST_LINE_CONTENT_WIDTH_BYTES = 72; + + /** + * Character string that has one byte size in its UTF-8 encoded form to + * yield one byte of position offset. + */ + static final String FILL1BYTE = "x"; + static final String MARK_BEFORE = "y"; + static final String MARK_AFTER = "z"; + + /** + * Four byte name. + * By using header names of four characters length the same values can be + * used for testing line breaks in both headers (in main attributes as well + * as named sections) as well as section names because a named section name + * is represented basically like any other header but follows an empty line + * and the key is always "Name". + * Relative to the start of the value, this way the same offset to the + * character to test breaking can be used in all cases. + */ + static final String FOUR_BYTE_NAME = "Name"; + + /** + * Distinguishes main attributes headers, section names, and headers in + * named sections because an implementation might make a difference. + */ + enum PositionInManifest { + /** + * @see Attributes#writeMain + */ + MAIN_ATTRIBUTES, + /** + * @see Attributes#write + */ + SECTION_NAME, + /** + * @see Manifest#write + */ + NAMED_SECTION; + } + + static String numByteUnicodeCharacter(int numBytes) { + String string; + switch (numBytes) { + case 1: string = "i"; break; + case 2: string = "\u00EF"; break; // small letter i with diaresis + case 3: string = "\uFB00"; break; // small double f ligature + case 4: string = Character.toString(0x2070E); break; // ? + default: throw new RuntimeException(); + } + assertEquals(string.getBytes(UTF_8).length, numBytes, + "self-test failed: unexpected UTF-8 encoded character length"); + return string; + } + + /** + * Produces test cases with all combinations of circumstances covered in + * which a character could possibly be attempted to be broken across a line + * break onto a continuation line:

+ * The same set of test parameters is used to write and read manifests + * once without breaking characters apart + * ({@link #testWriteLineBreaksKeepCharactersTogether(int, int, int, int, + * PositionInManifest, String, String)}) and once with doing so + * ({@link #readCharactersBrokenAcrossLines(int, int, int, int, + * PositionInManifest, String, String)}). + * The latter case covers backwards compatibility and involves writing + * manifests like they were written before resolution of bug 6443578. + */ + @DataProvider(name = "lineBreakParameters") + public static Object[][] lineBreakParameters() { + LinkedList params = new LinkedList<>(); + + // b: number of line breaks before character under test + for (int b = 0; b <= 3; b++) { + + // c: unicode character UTF-8 encoded length in bytes + for (int c = 1; c <= 4; c++) { + + // p: potential break position offset in bytes + // p == 0 => before character, + // p == c => after character, and + // 0 < p < c => character potentially broken across line break + // within the character + for (int p = c; p >= 0; p--) { + + // a: no or one character following the one under test + // (a == 0 meaning the character under test is the end of + // the value which is followed by a line break in the + // resulting manifest without continuation line space which + // concludes the value) + for (int a = 0; a <= 1; a++) { + + // offset: so many characters (actually bytes here, + // filled with one byte characters) are needed to place + // the next character (the character under test) into a + // position relative to the maximum line width that it + // may or may not have to be broken onto the next line + int offset = + // number of lines; - 1 due to continuation " " + b * (MANIFEST_LINE_CONTENT_WIDTH_BYTES - 1) + // line length minus "Name: ".length() + + MANIFEST_LINE_CONTENT_WIDTH_BYTES - 6 + // position of maximum line width relative to + // beginning of encoded character + - p; + String value = ""; + for (int i = 0; i < offset - 1; i++) { + value += FILL1BYTE; + } + // character before the one to test the break + value += MARK_BEFORE; + String character = numByteUnicodeCharacter(c); + value += character; + for (int i = 0; i < a; i++) { + // character after the one to test the break + value += MARK_AFTER; + } + + for (PositionInManifest i : + PositionInManifest.values()) { + + params.add(new Object[] { + b, c, p, a, i, character, value}); + } + } + } + } + } + + return params.toArray(new Object[][] {{}}); + } + + /** + * Checks that unicode characters work well with line breaks and + * continuation lines in jar manifests without breaking a character across + * a line break even when encoded in UTF-8 with more than one byte. + *

+ * For each of the cases provided by {@link #lineBreakParameters()} the + * break position is verified in the written manifest binary form as well + * as verified that it restores to the original values when read again. + *

+ * As an additional check, the binary manifests are decoded from UTF-8 + * into Strings before re-joining continued lines. + */ + @Test(dataProvider = "lineBreakParameters") + public void testWriteLineBreaksKeepCharactersTogether(int b, int c, int p, + int a, PositionInManifest i, String character, String value) + throws IOException { + byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, value); + + // in order to unambiguously establish the position of "character" in + // brokenPart, brokenPart is prepended and appended with what is + // expected before and after it... + String brokenPart = MARK_BEFORE; + + // expect the whole character on the next line unless it fits + // completely on the current line + boolean breakExpected = p < c; + if (breakExpected) { + brokenPart += "\r\n "; + } + brokenPart += character; + // expect a line break before the next character if there is a next + // character and the previous not already broken on next line + if (a > 0) { + if (!breakExpected) { + brokenPart += "\r\n "; + } + brokenPart += MARK_AFTER; + } + brokenPart = brokenPart + "\r\n"; + try { + assertOccurrence(mfBytes, brokenPart.getBytes(UTF_8)); + readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value); + decodeManifestFromUTF8AndAssertHeaderValue( + mfBytes, FOUR_BYTE_NAME, value, true); + } catch (AssertionError e) { + System.out.println("-".repeat(72)); + System.out.print(new String(mfBytes, UTF_8)); + System.out.println("-".repeat(72)); + throw e; + } + } + + static byte[] writeManifest(PositionInManifest i, String name, + String value) throws IOException { + Manifest mf = new Manifest(); + mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0"); + Attributes attributes = new Attributes(); + + switch (i) { + case MAIN_ATTRIBUTES: + mf.getMainAttributes().put(new Name(name), value); + break; + case SECTION_NAME: + mf.getEntries().put(value, attributes); + break; + case NAMED_SECTION: + mf.getEntries().put(FOUR_BYTE_NAME, attributes); + attributes.put(new Name(name), value); + break; + } + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mf.write(out); + return out.toByteArray(); + } + + /** + * Asserts one and only one occurrence of a sequence of bytes {@code part} + * representing the character and how it is expected to be broken and its + * surrounding bytes in a larger sequence that corresponds to the manifest + * in binary form {@code mf}. + */ + static void assertOccurrence(byte[] mf, byte[] part) { + List matchPos = new LinkedList<>(); + for (int i = 0; i < mf.length; i++) { + for (int j = 0; j < part.length && i + j <= mf.length; j++) { + if (part[j] == 0) { + if (i + j != mf.length) { + break; // expected eof not found + } + } else if (i + j == mf.length) { + break; + } else if (mf[i + j] != part[j]) { + break; + } + if (j == part.length - 1) { + matchPos.add(i); + } + } + } + assertEquals(matchPos.size(), 1, "not " + + (matchPos.size() < 1 ? "found" : "unique") + ": '" + + new String(part, UTF_8) + "'"); + } + + static void readManifestAndAssertValue( + byte[] mfBytes, PositionInManifest i, String name, String value) + throws IOException { + Manifest mf = new Manifest(new ByteArrayInputStream(mfBytes)); + + switch (i) { + case MAIN_ATTRIBUTES: + assertEquals(mf.getMainAttributes().getValue(name), value, + "main attributes header value"); + break; + case SECTION_NAME: + Attributes attributes = mf.getAttributes(value); + assertNotNull(attributes, "named section not found"); + break; + case NAMED_SECTION: + attributes = mf.getAttributes(FOUR_BYTE_NAME); + assertEquals(attributes.getValue(name), value, + "named section attributes header value"); + break; + } + } + + /** + * Decodes a binary manifest {@code mfBytes} into UTF-8 first, before + * joining the continuation lines unlike {@link Manifest} and + * {@link Attributes} which join the continuation lines first, before + * decoding the joined line from UTF-8 into a {@link String}, indicating + * the binary manifest is valid UTF-8. + */ + static void decodeManifestFromUTF8AndAssertHeaderValue( + byte[] mfBytes, String name, String value, + boolean validUTF8ManifestExpected) throws IOException { + String mf = new String(mfBytes, UTF_8); + mf = mf.replaceAll("(\\r\\n|(?!\\r)\\n|\\r(?!\\n)) ", ""); + assertHeaderValueInManifestAsString( + mf, name, value, validUTF8ManifestExpected); + } + + static void assertHeaderValueInManifestAsString( + String mf, String name, String value, + boolean validUTF8ManifestExpected) throws IOException { + String header = "\r\n" + name + ": " + value + "\r\n"; + int pos = mf.indexOf(header); + if (validUTF8ManifestExpected) { + assertTrue(pos > 0); + pos = mf.indexOf(header, pos + 1); // unique, no next occurrence + } + assertTrue(pos == -1); + } + + @Test(dataProvider = "lineBreakParameters") + public void readCharactersBrokenAcrossLines(int b, int c, int p, int a, + PositionInManifest i, String character, String value) + throws IOException { + byte[] mfBytes = writeManifestWithBrokenCharacters(i, + FOUR_BYTE_NAME, value); + + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + buf.write(MARK_BEFORE.getBytes(UTF_8)); + byte[] characterBytes = character.getBytes(UTF_8); + // the portion of the character that fits on the current line before + // a break at 72 bytes, ranges from nothing (p == 0) to the whole + // character (p == c) + for (int j = 0; j < p; j++) { + buf.write(characterBytes, j, 1); + } + // expect a line break at exactly 72 bytes from the beginning of the + // line unless the whole character fits on that line + boolean breakExpected = p < c; + if (breakExpected) { + buf.write("\r\n ".getBytes(UTF_8)); + } + // the remaining portion of the character, if any + for (int j = p; j < c; j++) { + buf.write(characterBytes, j, 1); + } + // expect another line break if the whole character fitted on the same + // line and there is another character + if (a == 1) { + if (c == p) { + buf.write("\r\n ".getBytes(UTF_8)); + } + buf.write(MARK_AFTER.getBytes(UTF_8)); + } + // if no other character followed expect a line break immediately + buf.write("\r\n".getBytes(UTF_8)); + byte[] brokenPart = buf.toByteArray(); + try { + assertOccurrence(mfBytes, brokenPart); + readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value); + decodeManifestFromUTF8AndAssertHeaderValue( + mfBytes, FOUR_BYTE_NAME, value, p == 0 || p == c); + } catch (AssertionError e) { + System.out.println("-".repeat(72)); + System.out.print(new String(mfBytes, UTF_8)); + System.out.println("-".repeat(72)); + throw e; + } + } + + /** + * From the previous {@link Manifest} implementation reduced to the minimum + * required to demonstrate compatibility. + */ + @SuppressWarnings("deprecation") + static byte[] writeManifestWithBrokenCharacters( + PositionInManifest i, String name, String value) + throws IOException { + byte[] vb = value.getBytes(UTF_8); + value = new String(vb, 0, 0, vb.length); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(out); + dos.writeBytes(Name.MANIFEST_VERSION + ": 0.1\r\n"); + + if (i == PositionInManifest.MAIN_ATTRIBUTES) { + StringBuffer buffer = new StringBuffer(name); + buffer.append(": "); + buffer.append(value); + make72Safe(buffer); + buffer.append("\r\n"); + dos.writeBytes(buffer.toString()); + } + dos.writeBytes("\r\n"); + + if (i == PositionInManifest.SECTION_NAME || + i == PositionInManifest.NAMED_SECTION) { + StringBuffer buffer = new StringBuffer("Name: "); + if (i == PositionInManifest.SECTION_NAME) { + buffer.append(value); + } else { + buffer.append(FOUR_BYTE_NAME); + } + make72Safe(buffer); + buffer.append("\r\n"); + dos.writeBytes(buffer.toString()); + + if (i == PositionInManifest.NAMED_SECTION) { + buffer = new StringBuffer(name); + buffer.append(": "); + buffer.append(value); + make72Safe(buffer); + buffer.append("\r\n"); + dos.writeBytes(buffer.toString()); + } + + dos.writeBytes("\r\n"); + } + + dos.flush(); + return out.toByteArray(); + } + + /** + * Adds line breaks to enforce a maximum 72 bytes per line. + *

+ * From previous Manifest implementation without respect for UTF-8 encoded + * character boundaries breaking also within multi-byte UTF-8 encoded + * characters. + * + * @see {@link Manifest#make72Safe(StringBuffer)} + */ + static void make72Safe(StringBuffer line) { + int length = line.length(); + int index = 72; + while (index < length) { + line.insert(index, "\r\n "); + index += 74; // + line width + line break ("\r\n") + length += 3; // + line break ("\r\n") and space + } + } + + @DataProvider(name = "positionInManifestValues") + public static Object[][] positionInManifestValues() { + LinkedList params = new LinkedList<>(); + for (PositionInManifest i : PositionInManifest.values()) { + params.add(new Object[] {i}); + } + return params.toArray(new Object[][] {{}}); + } + + @Test(dataProvider = "positionInManifestValues") + public void testEmptyValues(PositionInManifest i) throws Exception { + byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, ""); + readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, ""); + } + +} --- /dev/null 2020-02-10 14:32:17.000000000 -0500 +++ new/test/jdk/java/util/jar/Manifest/Println72.java 2020-02-10 14:32:16.000000000 -0500 @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.ByteArrayOutputStream; +import java.util.List; +import java.util.ArrayList; +import java.util.jar.Manifest; +import java.util.jar.Attributes.Name; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.testng.annotations.Test; +import static org.testng.Assert.*; + +/** + * @test + * @compile ../../../../sun/security/tools/jarsigner/Utils.java + * @bug 6443578 6202130 + * @run testng Println72 + * @summary Tests {@link Manifest#println72} line breaking with some particular + * border case kind of test cases. + * For another test covering the complete Unicode character set see + * {@link ValueUtf8Coding}. + */ +public class Println72 { + + static final Name TEST_NAME = new Name("test"); + + static final int NAME_SEP_LENGTH = (TEST_NAME + ": ").length(); + + void test(String originalValue, int... breakPositionsBytes) throws Exception + { + String expectedValueWithBreaksInManifest = originalValue; + // iterating backwards because inserting breaks affects original + // positions + for (int i = breakPositionsBytes.length - 1; i >= 0; i--) { + int breakPositionBytes = breakPositionsBytes[i]; + + int bytesSoFar = 0; + int charsSoFar = 0; + while (bytesSoFar < breakPositionBytes) { + String s = expectedValueWithBreaksInManifest + .substring(charsSoFar, charsSoFar + 1); + charsSoFar++; + bytesSoFar += s.getBytes(UTF_8).length; + if (bytesSoFar > breakPositionBytes) { + fail("break position not aligned with characters"); + } + } + int breakPositionCharacters = charsSoFar; + + expectedValueWithBreaksInManifest = + expectedValueWithBreaksInManifest + .substring(0, breakPositionCharacters) + + "\r\n " + + expectedValueWithBreaksInManifest + .substring(breakPositionCharacters); + } + + Manifest mf = new Manifest(); + mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0"); + mf.getMainAttributes().put(TEST_NAME, originalValue); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mf.write(out); + byte[] mfBytes = out.toByteArray(); + + Utils.echoManifest(mfBytes, "Println72"); + byte[] actual = mfBytes; + String expected = + "Manifest-Version: 1.0\r\n" + + TEST_NAME + ": " + expectedValueWithBreaksInManifest + + "\r\n\r\n"; + try { + assertEquals(new String(actual, UTF_8), expected); + assertEquals(actual, expected.getBytes(UTF_8)); + } catch (AssertionError e) { + System.out.println("actual = " + byteArrayToIntList(actual)); + System.out.println("expected = " + byteArrayToIntList( + expected.getBytes(UTF_8))); + throw e; + } + } + + static List byteArrayToIntList(byte[] bytes) { + List list = new ArrayList<>(); + for (int i = 0; i < bytes.length; i++) { + list.add((int) bytes[i]); + } + return list; + } + + @Test + public void testEmpty() throws Exception { + test(""); // expect neither a line break nor an exception + } + + static final String COMBINING_DIACRITICAL_MARKS = + IntStream.range(0x300, 0x36F) + .mapToObj(i -> new String(Character.toChars(i))) + .collect(Collectors.joining()); + + static String getCharSeq(int numberOfBytes) { + String seq = (numberOfBytes % 2 == 1 ? "e" : "\u00E6") + + COMBINING_DIACRITICAL_MARKS.substring(0, (numberOfBytes - 1) / 2); + assertEquals(seq.getBytes(UTF_8).length, numberOfBytes); + return seq; + } + + @Test + public void testBreakOnFirstLine() throws Exception { + // Combining sequence starts immediately after name and ": " and fits + // the remaining space in the first line. Expect no break. + test(getCharSeq(66)); + + // Combining sequence starts after name and ": " and exceeds the + // remaining space in the first line by one byte. Expect to break on a + // new line because the combining sequence fits on a continuation line + // which does not start with name and ": " and provides enough space. + test(getCharSeq(67), 0); + + // Combining sequence starts after name and ": " and exceeds the + // remaining space in the first line but still just fits exactly on a + // continuation line. Expect the value to break onto a new line. + test(getCharSeq(71), 0); + + // Combining sequence starts after name and ": " and exceeds the + // remaining space in the first line and neither fits on a continuation + // line. Expect that the first line to be filled with as many codepoints + // as fit on it and expect a line break onto a continuation line after + // 66 bytes of the first line value. + test(getCharSeq(72), 72 - NAME_SEP_LENGTH); + + // Combining sequence starts after name and ": x" and exceeds the + // remaining space in the first line and neither fits on a continuation + // line. Expect that the first line to be filled with as many codepoints + // as fit on it and expect a line break onto a continuation line already + // after 65 bytes of the first line because the following character is + // a code point represented with two bytes in UTF-8 which should not + // be interrupted with a line break. + test("x" + getCharSeq(72), 72 - NAME_SEP_LENGTH - 1); + } + + @Test + public void testBreakOnContinuationLine() throws Exception { + // fits on next line by skipping one byte free on current line + test("x".repeat(72 - NAME_SEP_LENGTH + 71 - 1) + getCharSeq(71), + 72 - NAME_SEP_LENGTH, + 72 - NAME_SEP_LENGTH + 71 - 1); + + // fits on current line exactly + test("x".repeat(72 - NAME_SEP_LENGTH + 71) + getCharSeq(71), + 72 - NAME_SEP_LENGTH, + 72 - NAME_SEP_LENGTH + 71); + + // fits on next line by inserting a line break after a line that + // contains only one character yet + test("x".repeat(72 - NAME_SEP_LENGTH + 71 + 1) + getCharSeq(71), + 72 - NAME_SEP_LENGTH, + 72 - NAME_SEP_LENGTH + 71, + 72 - NAME_SEP_LENGTH + 71 + 1); + + // does not fit on the next line and the one byte not yet used on the + // current line does not hold the first code point of the combined + // character sequence which is a code point encoded with two bytes in + // UTF-8. + test("x".repeat(72 - NAME_SEP_LENGTH + 71 - 1) + getCharSeq(72), + 72 - NAME_SEP_LENGTH, + 72 - NAME_SEP_LENGTH + 71 - 1, + 72 - NAME_SEP_LENGTH + 71 - 1 + 71 - 1); + + // would not fit on the next line alone but fits on the remaining two + // bytes available on the current line and the whole subsequent line. + test("x".repeat(72 - NAME_SEP_LENGTH + 71 - 2) + getCharSeq(72), + 72 - NAME_SEP_LENGTH, + 72 - NAME_SEP_LENGTH + 71); + + // previous character filled the whole previous line completely + // but the combined character sequence with 72 bytes still does not fit + // on a single line. the last code point is a two byte one so that an + // unused byte is left unused on the second last line. + test("x".repeat(72 - NAME_SEP_LENGTH + 71) + getCharSeq(72), + 72 - NAME_SEP_LENGTH, + 72 - NAME_SEP_LENGTH + 71, + 72 - NAME_SEP_LENGTH + 71 + 71 - 1); + + // previous character left one byte used on the current line and the + // remaining 70 bytes available. the combining sequence can use all of + // these 70 bytes because after 70 bytes a new code point starts + test("x".repeat(72 - NAME_SEP_LENGTH + 71 + 1) + getCharSeq(72), + 72 - NAME_SEP_LENGTH, + 72 - NAME_SEP_LENGTH + 71, + 72 - NAME_SEP_LENGTH + 71 + 71); + } + +}