1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 import static java.nio.charset.StandardCharsets.UTF_8;
  25 
  26 import java.io.ByteArrayInputStream;
  27 import java.io.ByteArrayOutputStream;
  28 import java.io.IOException;
  29 import java.util.jar.Attributes;
  30 import java.util.jar.Attributes.Name;
  31 import java.util.jar.Manifest;
  32 import java.util.List;
  33 import java.util.ArrayList;
  34 
  35 import org.testng.annotations.Test;
  36 import static org.testng.Assert.*;
  37 
  38 /**
  39  * @test
  40  * @bug 8066619
  41  * @run testng ValueUtf8Coding
  42  * @summary Tests encoding and decoding manifest header values to and from
  43  * UTF-8 with the complete Unicode character set.
  44  */ /*
  45  * see also "../tools/launcher/UnicodeTest.java" for manifest attributes
  46  * parsed during launch
  47  */
  48 public class ValueUtf8Coding {
  49 
  50     /**
  51      * Maximum number of bytes of UTF-8 encoded characters in one header value.
  52      * <p>
  53      * There are too many different Unicode code points (more than one million)
  54      * to fit all into one manifest value. The specifications state:
  55      * <q>Implementations should support 65535-byte (not character) header
  56      * values, and 65535 headers per file. They might run out of memory,
  57      * but there should not be hard-coded limits below these values.</q>
  58      *
  59      * @see <a
  60      * href="{@docRoot}/../specs/jar/jar.html#Notes_on_Manifest_and_Signature_Files">
  61      * Notes on Manifest and Signature Files</a>
  62      */
  63     static final int SUPPORTED_VALUE_LENGTH = 65535;
  64 
  65     /**
  66      * Returns {@code true} if {@code codePoint} is known not to be a supported
  67      * character in manifest header values. Explicitly forbidden in manifest
  68      * header values are according to a statement from the specifications:
  69      * <q>otherchar: any UTF-8 character except NUL, CR and LF</q>.
  70      * {@code NUL} ({@code 0x0}), however, works just fine and might have been
  71      * used and might still be.
  72      *
  73      * @see <a href="{@docRoot}/../specs/jar/jar.html#Section-Specification">
  74      * Jar File Specification</a>
  75      */
  76     static boolean isUnsupportedManifestValueCharacter(int codePoint) {
  77         return codePoint == '\r' /* CR */ || codePoint == '\n' /* LF */;
  78     };
  79 
  80     /**
  81      * Produces a list of strings with all Unicode characters except those
  82      * explicitly invalid in manifest header values.
  83      * Each string is filled with as many characters as fit into
  84      * {@link #SUPPORTED_VALUE_LENGTH} bytes with UTF-8 encoding except the
  85      * last string which contains the remaining characters. Each of those
  86      * strings becomes a header value the number of which 65535 should be
  87      * supported per file.
  88      *
  89      * @see <a
  90      * href="{@docRoot}/../specs/jar/jar.html#Notes_on_Manifest_and_Signature_Files">
  91      * Notes on Manifest and Signature Files</a>
  92      */
  93     static List<String> produceValuesWithAllUnicodeCharacters() {
  94         ArrayList<String> values = new ArrayList<>();
  95         byte[] valueBuf = new byte[SUPPORTED_VALUE_LENGTH];
  96         int pos = 0;
  97         for (int codePoint = Character.MIN_CODE_POINT;
  98                 codePoint <= Character.MAX_CODE_POINT; codePoint++) {
  99             if (isUnsupportedManifestValueCharacter(codePoint)) {
 100                 continue;
 101             }
 102 
 103             byte[] charBuf = Character.toString(codePoint).getBytes(UTF_8);
 104             if (pos + charBuf.length > valueBuf.length) {
 105                 values.add(new String(valueBuf, 0, pos, UTF_8));
 106                 pos = 0;
 107             }
 108             System.arraycopy(charBuf, 0, valueBuf, pos, charBuf.length);
 109             pos += charBuf.length;
 110         }
 111         if (pos > 0) {
 112             values.add(new String(valueBuf, 0, pos, UTF_8));
 113         }
 114         // minimum number of headers supported is the same as the minimum size
 115         // of each header value in bytes
 116         assertTrue(values.size() <= SUPPORTED_VALUE_LENGTH);
 117         return values;
 118     }
 119 
 120     /**
 121      * Returns simple, valid, short, and distinct manifest header names.
 122      * The returned name cannot collide with "{@code Manifest-Version}" because
 123      * the returned string does not contain "{@code -}".
 124      */
 125     static Name azName(int seed) {
 126         StringBuffer name = new StringBuffer();
 127         do {
 128             name.insert(0, (char) (seed % 26 + (seed < 26 ? 'A' : 'a')));
 129             seed = seed / 26 - 1;
 130         } while (seed >= 0);
 131         return new Name(name.toString());
 132     }
 133 
 134     /**
 135      * Writes and reads a manifest with the complete Unicode character set.
 136      * The characters are grouped into manifest header values with about as
 137      * many bytes as allowed each, utilizing a single big manifest.
 138      * <p>
 139      * This test assumes that a manifest is encoded and decoded correctly if
 140      * writing and then reading it again results in a manifest with identical
 141      * values as the original. The test is not about other aspects of writing
 142      * and reading manifests than only that, given the fact and the way it
 143      * works for some characters such as the most widely and often used ones,
 144      * it also works for the complete Unicode character set just the same.
 145      * <p>
 146      * Only header values are tested. The set of allowed characters for header
 147      * names are much more limited and are a different topic entirely and most
 148      * simple ones are used here as necessary just to get valid and different
 149      * ones (see {@link #azName}).
 150      * <p>
 151      * Because the current implementation under test uses different portions
 152      * of code depending on where the value occurs to read or write, each
 153      * character is tested in each of the three positions:<ul>
 154      * <li>main attribute header,</li>
 155      * <li>named section name, and</li>
 156      * <li>named sections header values</li>
 157      * </ul>
 158      * Implementation of writing the main section headers in
 159      * {@link Attributes#writeMain(java.io.DataOutputStream)} differs from the
 160      * one writing named section headers in
 161      * {@link Attributes#write(java.io.DataOutputStream)} regarding the special
 162      * order of {@link Name#MANIFEST_VERSION} and
 163      * {@link Name#SIGNATURE_VERSION} and also
 164      * {@link Manifest#read(java.io.InputStream)} at least potentially reads
 165      * main sections differently than reading named sections names headers in
 166      * {@link Attributes#read(Manifest.FastInputStream, byte[])}.
 167      */
 168     @Test
 169     public void testCompleteUnicodeCharacterSet() throws IOException {
 170         Manifest mf = new Manifest();
 171         mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
 172 
 173         List<String> values = produceValuesWithAllUnicodeCharacters();
 174         for (int i = 0; i < values.size(); i++) {
 175             Name name = azName(i);
 176             String value = values.get(i);
 177 
 178             mf.getMainAttributes().put(name, value);
 179             Attributes attributes = new Attributes();
 180             mf.getEntries().put(value, attributes);
 181             attributes.put(name, value);
 182         }
 183 
 184         mf = writeAndRead(mf);
 185 
 186         for (int i = 0; i < values.size(); i++) {
 187             String value = values.get(i);
 188             Name name = azName(i);
 189 
 190             assertEquals(mf.getMainAttributes().getValue(name), value,
 191                     "main attributes header value");
 192             Attributes attributes = mf.getAttributes(value);
 193             assertNotNull(attributes, "named section");
 194             assertEquals(attributes.getValue(name), value,
 195                     "named section attributes value");
 196         }
 197     }
 198 
 199     static Manifest writeAndRead(Manifest mf) throws IOException {
 200         ByteArrayOutputStream out = new ByteArrayOutputStream();
 201         mf.write(out);
 202         byte[] mfBytes = out.toByteArray();
 203 
 204         System.out.println("-".repeat(72));
 205         System.out.print(new String(mfBytes, UTF_8));
 206         System.out.println("-".repeat(72));
 207 
 208         ByteArrayInputStream in = new ByteArrayInputStream(mfBytes);
 209         return new Manifest(in);
 210     }
 211 
 212 }