1 /*
   2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 import static java.nio.charset.StandardCharsets.UTF_8;
  25 
  26 import java.io.ByteArrayInputStream;
  27 import java.io.ByteArrayOutputStream;
  28 import java.io.DataOutputStream;
  29 import java.io.IOException;
  30 import java.util.jar.Attributes;
  31 import java.util.jar.Manifest;
  32 import java.util.jar.Attributes.Name;
  33 import java.util.List;
  34 import java.util.LinkedList;
  35 
  36 import org.testng.annotations.Test;
  37 import org.testng.annotations.DataProvider;
  38 import static org.testng.Assert.*;
  39 
  40 /**
  41  * @test
  42  * @bug 6443578 6202130
  43  * @run testng LineBreakCharacter
  44  * @summary Tests breaking manifest header values across lines in conjunction
  45  * with Unicode characters encoded in UTF-8 with a variable number of bytes
  46  * when reading and writing jar manifests results in valid UTF-8.
  47  * <p>
  48  * The manifest line length limit (72 bytes) may be reached at a position
  49  * between multiple bytes of a single UTF-8 encoded character. Although
  50  * characters should not be broken across lines according to the specification
  51  * the previous Manifest implementation did.
  52  * <p>
  53  * This test makes sure that no character is broken apart across a line break
  54  * when writing manifests and also that manifests are still read correctly
  55  * whether or not characters encoded in UTF-8 with more than one byte are
  56  * interrupted with and continued after a line break for compatibility when
  57  * reading older manifests.
  58  */
  59 public class LineBreakCharacter {
  60 
  61     static final int MANIFEST_LINE_CONTENT_WIDTH_BYTES = 72;
  62 
  63     /**
  64      * Character string that has one byte size in its UTF-8 encoded form to
  65      * yield one byte of position offset.
  66      */
  67     static final String FILL1BYTE = "x";
  68     static final String MARK_BEFORE = "y";
  69     static final String MARK_AFTER = "z";
  70 
  71     /**
  72      * Four byte name.
  73      * By using header names of four characters length the same values can be
  74      * used for testing line breaks in both headers (in main attributes as well
  75      * as named sections) as well as section names because a named section name
  76      * is represented basically like any other header but follows an empty line
  77      * and the key is always "Name".
  78      * Relative to the start of the value, this way the same offset to the
  79      * character to test breaking can be used in all cases.
  80      */
  81     static final String FOUR_BYTE_NAME = "Name";
  82 
  83     /**
  84      * Distinguishes main attributes headers, section names, and headers in
  85      * named sections because an implementation might make a difference.
  86      */
  87     enum PositionInManifest {
  88         /**
  89          * @see Attributes#writeMain
  90          */
  91         MAIN_ATTRIBUTES,
  92         /**
  93          * @see Attributes#write
  94          */
  95         SECTION_NAME,
  96         /**
  97          * @see Manifest#write
  98          */
  99         NAMED_SECTION;
 100     }
 101 
 102     static String numByteUnicodeCharacter(int numBytes) {
 103         String string;
 104         switch (numBytes) {
 105             case 1: string = "i"; break;
 106             case 2: string = "\u00EF"; break; // small letter i with diaresis
 107             case 3: string = "\uFB00"; break; // small double f ligature
 108             case 4: string = Character.toString(0x2070E); break; // ?
 109             default: throw new RuntimeException();
 110         }
 111         assertEquals(string.getBytes(UTF_8).length, numBytes,
 112                 "self-test failed: unexpected UTF-8 encoded character length");
 113         return string;
 114     }
 115 
 116     /**
 117      * Produces test cases with all combinations of circumstances covered in
 118      * which a character could possibly be attempted to be broken across a line
 119      * break onto a continuation line:<ul>
 120      * <li>different sizes of a UTF-8 encoded characters: one, two, three, and
 121      * four bytes,</li>
 122      * <li>all possible positions of the character to test breaking with
 123      * relative respect to the 72-byte line length limit including immediately
 124      * before that character and immediately after the character and every
 125      * position in between for multi-byte UTF-8 encoded characters,</li>
 126      * <li>different number of preceding line breaks in the same value</li>
 127      * <li>at the end of the value or followed by another character</li>
 128      * <li>in a main attributes header value, section name, or named section
 129      * header value (see also {@link #PositionInManifest})</li>
 130      * </ul>
 131      * The same set of test parameters is used to write and read manifests
 132      * once without breaking characters apart
 133      * ({@link #testWriteLineBreaksKeepCharactersTogether(int, int, int, int,
 134      * PositionInManifest, String, String)}) and once with doing so
 135      * ({@link #readCharactersBrokenAcrossLines(int, int, int, int,
 136      * PositionInManifest, String, String)}).
 137      * The latter case covers backwards compatibility and involves writing
 138      * manifests like they were written before resolution of bug 6443578.
 139      */
 140     @DataProvider(name = "lineBreakParameters")
 141     public static Object[][] lineBreakParameters() {
 142         LinkedList<Object[]> params = new LinkedList<>();
 143 
 144         // b: number of line breaks before character under test
 145         for (int b = 0; b <= 3; b++) {
 146 
 147            // c: unicode character UTF-8 encoded length in bytes
 148            for (int c = 1; c <= 4; c++) {
 149 
 150                 // p: potential break position offset in bytes
 151                 // p == 0 => before character,
 152                 // p == c => after character, and
 153                 // 0 < p < c => character potentially broken across line break
 154                 //              within the character
 155                 for (int p = c; p >= 0; p--) {
 156 
 157                     // a: no or one character following the one under test
 158                     // (a == 0 meaning the character under test is the end of
 159                     // the value which is followed by a line break in the
 160                     // resulting manifest without continuation line space which
 161                     // concludes the value)
 162                     for (int a = 0; a <= 1; a++) {
 163 
 164                         // offset: so many characters (actually bytes here,
 165                         // filled with one byte characters) are needed to place
 166                         // the next character (the character under test) into a
 167                         // position relative to the maximum line width that it
 168                         // may or may not have to be broken onto the next line
 169                         int offset =
 170                                 // number of lines; - 1 due to continuation " "
 171                                 b * (MANIFEST_LINE_CONTENT_WIDTH_BYTES - 1)
 172                                 // line length minus "Name: ".length()
 173                                 + MANIFEST_LINE_CONTENT_WIDTH_BYTES - 6
 174                                 // position of maximum line width relative to
 175                                 // beginning of encoded character
 176                                 - p;
 177                         String value = "";
 178                         for (int i = 0; i < offset - 1; i++) {
 179                             value += FILL1BYTE;
 180                         }
 181                         // character before the one to test the break
 182                         value += MARK_BEFORE;
 183                         String character = numByteUnicodeCharacter(c);
 184                         value += character;
 185                         for (int i = 0; i < a; i++) {
 186                             // character after the one to test the break
 187                             value += MARK_AFTER;
 188                         }
 189 
 190                         for (PositionInManifest i :
 191                                 PositionInManifest.values()) {
 192 
 193                             params.add(new Object[] {
 194                                     b, c, p, a, i, character, value});
 195                         }
 196                     }
 197                 }
 198             }
 199         }
 200 
 201         return params.toArray(new Object[][] {{}});
 202     }
 203 
 204     /**
 205      * Checks that unicode characters work well with line breaks and
 206      * continuation lines in jar manifests without breaking a character across
 207      * a line break even when encoded in UTF-8 with more than one byte.
 208      * <p>
 209      * For each of the cases provided by {@link #lineBreakParameters()} the
 210      * break position is verified in the written manifest binary form as well
 211      * as verified that it restores to the original values when read again.
 212      * <p>
 213      * As an additional check, the binary manifests are decoded from UTF-8
 214      * into Strings before re-joining continued lines.
 215      */
 216     @Test(dataProvider = "lineBreakParameters")
 217     public void testWriteLineBreaksKeepCharactersTogether(int b, int c, int p,
 218             int a, PositionInManifest i, String character, String value)
 219                     throws IOException {
 220         byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, value);
 221 
 222         // in order to unambiguously establish the position of "character" in
 223         // brokenPart, brokenPart is prepended and appended with what is
 224         // expected before and after it...
 225         String brokenPart = MARK_BEFORE;
 226 
 227         // expect the whole character on the next line unless it fits
 228         // completely on the current line
 229         boolean breakExpected = p < c;
 230         if (breakExpected) {
 231             brokenPart += "\r\n ";
 232         }
 233         brokenPart += character;
 234         // expect a line break before the next character if there is a next
 235         // character and the previous not already broken on next line
 236         if (a > 0) {
 237             if (!breakExpected) {
 238                 brokenPart += "\r\n ";
 239             }
 240             brokenPart += MARK_AFTER;
 241         }
 242         brokenPart = brokenPart + "\r\n";
 243         try {
 244             assertOccurrence(mfBytes, brokenPart.getBytes(UTF_8));
 245             readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value);
 246             decodeManifestFromUTF8AndAssertHeaderValue(
 247                     mfBytes, FOUR_BYTE_NAME, value, true);
 248         } catch (AssertionError e) {
 249             System.out.println("-".repeat(72));
 250             System.out.print(new String(mfBytes, UTF_8));
 251             System.out.println("-".repeat(72));
 252             throw e;
 253         }
 254     }
 255 
 256     static byte[] writeManifest(PositionInManifest i, String name,
 257             String value) throws IOException {
 258         Manifest mf = new Manifest();
 259         mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
 260         Attributes attributes = new Attributes();
 261 
 262         switch (i) {
 263         case MAIN_ATTRIBUTES:
 264             mf.getMainAttributes().put(new Name(name), value);
 265             break;
 266         case SECTION_NAME:
 267             mf.getEntries().put(value, attributes);
 268             break;
 269         case NAMED_SECTION:
 270             mf.getEntries().put(FOUR_BYTE_NAME, attributes);
 271             attributes.put(new Name(name), value);
 272             break;
 273         }
 274 
 275         ByteArrayOutputStream out = new ByteArrayOutputStream();
 276         mf.write(out);
 277         return out.toByteArray();
 278     }
 279 
 280     /**
 281      * Asserts one and only one occurrence of a sequence of bytes {@code part}
 282      * representing the character and how it is expected to be broken and its
 283      * surrounding bytes in a larger sequence that corresponds to the manifest
 284      * in binary form {@code mf}.
 285      */
 286     static void assertOccurrence(byte[] mf, byte[] part) {
 287         List<Integer> matchPos = new LinkedList<>();
 288         for (int i = 0; i < mf.length; i++) {
 289             for (int j = 0; j < part.length && i + j <= mf.length; j++) {
 290                 if (part[j] == 0) {
 291                     if (i + j != mf.length) {
 292                         break; // expected eof not found
 293                     }
 294                 } else if (i + j == mf.length) {
 295                     break;
 296                 } else if (mf[i + j] != part[j]) {
 297                     break;
 298                 }
 299                 if (j == part.length - 1) {
 300                     matchPos.add(i);
 301                 }
 302             }
 303         }
 304         assertEquals(matchPos.size(), 1, "not "
 305                 + (matchPos.size() < 1 ? "found" : "unique") + ": '"
 306                 + new String(part, UTF_8) + "'");
 307     }
 308 
 309     static void readManifestAndAssertValue(
 310             byte[] mfBytes, PositionInManifest i, String name, String value)
 311                     throws IOException {
 312         Manifest mf = new Manifest(new ByteArrayInputStream(mfBytes));
 313 
 314         switch (i) {
 315         case MAIN_ATTRIBUTES:
 316             assertEquals(mf.getMainAttributes().getValue(name), value,
 317                     "main attributes header value");
 318             break;
 319         case SECTION_NAME:
 320             Attributes attributes = mf.getAttributes(value);
 321             assertNotNull(attributes, "named section not found");
 322             break;
 323         case NAMED_SECTION:
 324             attributes = mf.getAttributes(FOUR_BYTE_NAME);
 325             assertEquals(attributes.getValue(name), value,
 326                     "named section attributes header value");
 327             break;
 328         }
 329     }
 330 
 331     /**
 332      * Decodes a binary manifest {@code mfBytes} into UTF-8 first, before
 333      * joining the continuation lines unlike {@link Manifest} and
 334      * {@link Attributes} which join the continuation lines first, before
 335      * decoding the joined line from UTF-8 into a {@link String}, indicating
 336      * the binary manifest is valid UTF-8.
 337      */
 338     static void decodeManifestFromUTF8AndAssertHeaderValue(
 339             byte[] mfBytes, String name, String value,
 340             boolean validUTF8ManifestExpected) throws IOException {
 341         String mf = new String(mfBytes, UTF_8);
 342         mf = mf.replaceAll("(\\r\\n|(?!\\r)\\n|\\r(?!\\n)) ", "");
 343         assertHeaderValueInManifestAsString(
 344                 mf, name, value, validUTF8ManifestExpected);
 345     }
 346 
 347     static void assertHeaderValueInManifestAsString(
 348             String mf, String name, String value,
 349             boolean validUTF8ManifestExpected) throws IOException {
 350         String header = "\r\n" + name + ": " + value + "\r\n";
 351         int pos = mf.indexOf(header);
 352         if (validUTF8ManifestExpected) {
 353             assertTrue(pos > 0);
 354             pos = mf.indexOf(header, pos + 1); // unique, no next occurrence
 355         }
 356         assertTrue(pos == -1);
 357     }
 358 
 359     @Test(dataProvider = "lineBreakParameters")
 360     public void readCharactersBrokenAcrossLines(int b, int c, int p, int a,
 361             PositionInManifest i, String character, String value)
 362                     throws IOException {
 363         byte[] mfBytes = writeManifestWithBrokenCharacters(i,
 364                 FOUR_BYTE_NAME, value);
 365 
 366         ByteArrayOutputStream buf = new ByteArrayOutputStream();
 367         buf.write(MARK_BEFORE.getBytes(UTF_8));
 368         byte[] characterBytes = character.getBytes(UTF_8);
 369         // the portion of the character that fits on the current line before
 370         // a break at 72 bytes, ranges from nothing (p == 0) to the whole
 371         // character (p == c)
 372         for (int j = 0; j < p; j++) {
 373             buf.write(characterBytes, j, 1);
 374         }
 375         // expect a line break at exactly 72 bytes from the beginning of the
 376         // line unless the whole character fits on that line
 377         boolean breakExpected = p < c;
 378         if (breakExpected) {
 379             buf.write("\r\n ".getBytes(UTF_8));
 380         }
 381         // the remaining portion of the character, if any
 382         for (int j = p; j < c; j++) {
 383             buf.write(characterBytes, j, 1);
 384         }
 385         // expect another line break if the whole character fitted on the same
 386         // line and there is another character
 387         if (a == 1) {
 388             if (c == p) {
 389                 buf.write("\r\n ".getBytes(UTF_8));
 390             }
 391             buf.write(MARK_AFTER.getBytes(UTF_8));
 392         }
 393         // if no other character followed expect a line break immediately
 394         buf.write("\r\n".getBytes(UTF_8));
 395         byte[] brokenPart = buf.toByteArray();
 396         try {
 397             assertOccurrence(mfBytes, brokenPart);
 398             readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value);
 399             decodeManifestFromUTF8AndAssertHeaderValue(
 400                     mfBytes, FOUR_BYTE_NAME, value, p == 0 || p == c);
 401         } catch (AssertionError e) {
 402             System.out.println("-".repeat(72));
 403             System.out.print(new String(mfBytes, UTF_8));
 404             System.out.println("-".repeat(72));
 405             throw e;
 406         }
 407     }
 408 
 409     /**
 410      * From the previous {@link Manifest} implementation reduced to the minimum
 411      * required to demonstrate compatibility.
 412      */
 413     @SuppressWarnings("deprecation")
 414     static byte[] writeManifestWithBrokenCharacters(
 415             PositionInManifest i, String name, String value)
 416                     throws IOException {
 417         byte[] vb = value.getBytes(UTF_8);
 418         value = new String(vb, 0, 0, vb.length);
 419         ByteArrayOutputStream out = new ByteArrayOutputStream();
 420         DataOutputStream dos = new DataOutputStream(out);
 421         dos.writeBytes(Name.MANIFEST_VERSION + ": 0.1\r\n");
 422 
 423         if (i == PositionInManifest.MAIN_ATTRIBUTES) {
 424             StringBuffer buffer = new StringBuffer(name);
 425             buffer.append(": ");
 426             buffer.append(value);
 427             make72Safe(buffer);
 428             buffer.append("\r\n");
 429             dos.writeBytes(buffer.toString());
 430         }
 431         dos.writeBytes("\r\n");
 432 
 433         if (i == PositionInManifest.SECTION_NAME ||
 434                 i == PositionInManifest.NAMED_SECTION) {
 435             StringBuffer buffer = new StringBuffer("Name: ");
 436             if (i == PositionInManifest.SECTION_NAME) {
 437                 buffer.append(value);
 438             } else {
 439                 buffer.append(FOUR_BYTE_NAME);
 440             }
 441             make72Safe(buffer);
 442             buffer.append("\r\n");
 443             dos.writeBytes(buffer.toString());
 444 
 445             if (i == PositionInManifest.NAMED_SECTION) {
 446                 buffer = new StringBuffer(name);
 447                 buffer.append(": ");
 448                 buffer.append(value);
 449                 make72Safe(buffer);
 450                 buffer.append("\r\n");
 451                 dos.writeBytes(buffer.toString());
 452             }
 453 
 454             dos.writeBytes("\r\n");
 455         }
 456 
 457         dos.flush();
 458         return out.toByteArray();
 459     }
 460 
 461     /**
 462      * Adds line breaks to enforce a maximum 72 bytes per line.
 463      * <p>
 464      * From previous Manifest implementation without respect for UTF-8 encoded
 465      * character boundaries breaking also within multi-byte UTF-8 encoded
 466      * characters.
 467      *
 468      * @see {@link Manifest#make72Safe(StringBuffer)}
 469      */
 470     static void make72Safe(StringBuffer line) {
 471         int length = line.length();
 472         int index = 72;
 473         while (index < length) {
 474             line.insert(index, "\r\n ");
 475             index += 74; // + line width + line break ("\r\n")
 476             length += 3; // + line break ("\r\n") and space
 477         }
 478     }
 479 
 480     @DataProvider(name = "positionInManifestValues")
 481     public static Object[][] positionInManifestValues() {
 482         LinkedList<Object[]> params = new LinkedList<>();
 483         for (PositionInManifest i : PositionInManifest.values()) {
 484             params.add(new Object[] {i});
 485         }
 486         return params.toArray(new Object[][] {{}});
 487     }
 488 
 489     @Test(dataProvider = "positionInManifestValues")
 490     public void testEmptyValues(PositionInManifest i) throws Exception {
 491         byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, "");
 492         readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, "");
 493     }
 494 
 495 }