1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 import static java.nio.charset.StandardCharsets.UTF_8; 25 26 import java.io.ByteArrayInputStream; 27 import java.io.ByteArrayOutputStream; 28 import java.io.IOException; 29 import java.util.jar.Attributes; 30 import java.util.jar.Attributes.Name; 31 import java.util.jar.Manifest; 32 import java.util.List; 33 import java.util.ArrayList; 34 35 import org.testng.annotations.Test; 36 import static org.testng.Assert.*; 37 38 /** 39 * @test 40 * @bug 8066619 41 * @run testng ValueUtf8Coding 42 * @summary Tests encoding and decoding manifest header values to and from 43 * UTF-8 with the complete Unicode character set. 44 */ /* 45 * see also "../tools/launcher/UnicodeTest.java" for manifest attributes 46 * parsed during launch 47 */ 48 public class ValueUtf8Coding { 49 50 /** 51 * Maximum number of bytes of UTF-8 encoded characters in one header value. 52 * <p> 53 * There are too many different Unicode code points (more than one million) 54 * to fit all into one manifest value. The specifications state: 55 * <q>Implementations should support 65535-byte (not character) header 56 * values, and 65535 headers per file. They might run out of memory, 57 * but there should not be hard-coded limits below these values.</q> 58 * 59 * @see <a 60 * href="{@docRoot}/../specs/jar/jar.html#Notes_on_Manifest_and_Signature_Files"> 61 * Notes on Manifest and Signature Files</a> 62 */ 63 static final int SUPPORTED_VALUE_LENGTH = 65535; 64 65 /** 66 * Returns {@code true} if {@code codePoint} is known not to be a supported 67 * character in manifest header values. Explicitly forbidden in manifest 68 * header values are according to a statement from the specifications: 69 * <q>otherchar: any UTF-8 character except NUL, CR and LF</q>. 70 * {@code NUL} ({@code 0x0}), however, works just fine and might have been 71 * used and might still be. 72 * 73 * @see <a href="{@docRoot}/../specs/jar/jar.html#Section-Specification"> 74 * Jar File Specification</a> 75 */ 76 static boolean isUnsupportedManifestValueCharacter(int codePoint) { 77 return codePoint == '\r' /* CR */ || codePoint == '\n' /* LF */; 78 }; 79 80 /** 81 * Produces a list of strings with all Unicode characters except those 82 * explicitly invalid in manifest header values. 83 * Each string is filled with as many characters as fit into 84 * {@link #SUPPORTED_VALUE_LENGTH} bytes with UTF-8 encoding except the 85 * last string which contains the remaining characters. Each of those 86 * strings becomes a header value the number of which 65535 should be 87 * supported per file. 88 * 89 * @see <a 90 * href="{@docRoot}/../specs/jar/jar.html#Notes_on_Manifest_and_Signature_Files"> 91 * Notes on Manifest and Signature Files</a> 92 */ 93 static List<String> produceValuesWithAllUnicodeCharacters() { 94 ArrayList<String> values = new ArrayList<>(); 95 byte[] valueBuf = new byte[SUPPORTED_VALUE_LENGTH]; 96 int pos = 0; 97 for (int codePoint = Character.MIN_CODE_POINT; 98 codePoint <= Character.MAX_CODE_POINT; codePoint++) { 99 if (isUnsupportedManifestValueCharacter(codePoint)) { 100 continue; 101 } 102 103 byte[] charBuf = Character.toString(codePoint).getBytes(UTF_8); 104 if (pos + charBuf.length > valueBuf.length) { 105 values.add(new String(valueBuf, 0, pos, UTF_8)); 106 pos = 0; 107 } 108 System.arraycopy(charBuf, 0, valueBuf, pos, charBuf.length); 109 pos += charBuf.length; 110 } 111 if (pos > 0) { 112 values.add(new String(valueBuf, 0, pos, UTF_8)); 113 } 114 // minimum number of headers supported is the same as the minimum size 115 // of each header value in bytes 116 assertTrue(values.size() <= SUPPORTED_VALUE_LENGTH); 117 return values; 118 } 119 120 /** 121 * Returns simple, valid, short, and distinct manifest header names. 122 * The returned name cannot collide with "{@code Manifest-Version}" because 123 * the returned string does not contain "{@code -}". 124 */ 125 static Name azName(int seed) { 126 StringBuffer name = new StringBuffer(); 127 do { 128 name.insert(0, (char) (seed % 26 + (seed < 26 ? 'A' : 'a'))); 129 seed = seed / 26 - 1; 130 } while (seed >= 0); 131 return new Name(name.toString()); 132 } 133 134 /** 135 * Writes and reads a manifest with the complete Unicode character set. 136 * The characters are grouped into manifest header values with about as 137 * many bytes as allowed each, utilizing a single big manifest. 138 * <p> 139 * This test assumes that a manifest is encoded and decoded correctly if 140 * writing and then reading it again results in a manifest with identical 141 * values as the original. The test is not about other aspects of writing 142 * and reading manifests than only that, given the fact and the way it 143 * works for some characters such as the most widely and often used ones, 144 * it also works for the complete Unicode character set just the same. 145 * <p> 146 * Only header values are tested. The set of allowed characters for header 147 * names are much more limited and are a different topic entirely and most 148 * simple ones are used here as necessary just to get valid and different 149 * ones (see {@link #azName}). 150 * <p> 151 * Because the current implementation under test uses different portions 152 * of code depending on where the value occurs to read or write, each 153 * character is tested in each of the three positions:<ul> 154 * <li>main attribute header,</li> 155 * <li>named section name, and</li> 156 * <li>named sections header values</li> 157 * </ul> 158 * Implementation of writing the main section headers in 159 * {@link Attributes#writeMain(java.io.DataOutputStream)} differs from the 160 * one writing named section headers in 161 * {@link Attributes#write(java.io.DataOutputStream)} regarding the special 162 * order of {@link Name#MANIFEST_VERSION} and 163 * {@link Name#SIGNATURE_VERSION} and also 164 * {@link Manifest#read(java.io.InputStream)} at least potentially reads 165 * main sections differently than reading named sections names headers in 166 * {@link Attributes#read(Manifest.FastInputStream, byte[])}. 167 */ 168 @Test 169 public void testCompleteUnicodeCharacterSet() throws IOException { 170 Manifest mf = new Manifest(); 171 mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0"); 172 173 List<String> values = produceValuesWithAllUnicodeCharacters(); 174 for (int i = 0; i < values.size(); i++) { 175 Name name = azName(i); 176 String value = values.get(i); 177 178 mf.getMainAttributes().put(name, value); 179 Attributes attributes = new Attributes(); 180 mf.getEntries().put(value, attributes); 181 attributes.put(name, value); 182 } 183 184 mf = writeAndRead(mf); 185 186 for (int i = 0; i < values.size(); i++) { 187 String value = values.get(i); 188 Name name = azName(i); 189 190 assertEquals(mf.getMainAttributes().getValue(name), value, 191 "main attributes header value"); 192 Attributes attributes = mf.getAttributes(value); 193 assertNotNull(attributes, "named section"); 194 assertEquals(attributes.getValue(name), value, 195 "named section attributes value"); 196 } 197 } 198 199 static Manifest writeAndRead(Manifest mf) throws IOException { 200 ByteArrayOutputStream out = new ByteArrayOutputStream(); 201 mf.write(out); 202 byte[] mfBytes = out.toByteArray(); 203 204 System.out.println("-".repeat(72)); 205 System.out.print(new String(mfBytes, UTF_8)); 206 System.out.println("-".repeat(72)); 207 208 ByteArrayInputStream in = new ByteArrayInputStream(mfBytes); 209 return new Manifest(in); 210 } 211 212 }