1 /* 2 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 import static java.nio.charset.StandardCharsets.UTF_8; 25 26 import java.io.ByteArrayInputStream; 27 import java.io.ByteArrayOutputStream; 28 import java.io.DataOutputStream; 29 import java.io.IOException; 30 import java.util.jar.Attributes; 31 import java.util.jar.Manifest; 32 import java.util.jar.Attributes.Name; 33 import java.util.List; 34 import java.util.LinkedList; 35 36 import org.testng.annotations.Test; 37 import org.testng.annotations.DataProvider; 38 import static org.testng.Assert.*; 39 40 /** 41 * @test 42 * @bug 6443578 6202130 43 * @run testng LineBreakCharacter 44 * @summary Tests breaking manifest header values across lines in conjunction 45 * with Unicode characters encoded in UTF-8 with a variable number of bytes 46 * when reading and writing jar manifests results in valid UTF-8. 47 * <p> 48 * The manifest line length limit (72 bytes) may be reached at a position 49 * between multiple bytes of a single UTF-8 encoded character. Although 50 * characters should not be broken across lines according to the specification 51 * the previous Manifest implementation did. 52 * <p> 53 * This test makes sure that no character is broken apart across a line break 54 * when writing manifests and also that manifests are still read correctly 55 * whether or not characters encoded in UTF-8 with more than one byte are 56 * interrupted with and continued after a line break for compatibility when 57 * reading older manifests. 58 */ 59 public class LineBreakCharacter { 60 61 static final int MANIFEST_LINE_CONTENT_WIDTH_BYTES = 72; 62 63 /** 64 * Character string that has one byte size in its UTF-8 encoded form to 65 * yield one byte of position offset. 66 */ 67 static final String FILL1BYTE = "x"; 68 static final String MARK_BEFORE = "y"; 69 static final String MARK_AFTER = "z"; 70 71 /** 72 * Four byte name. 73 * By using header names of four characters length the same values can be 74 * used for testing line breaks in both headers (in main attributes as well 75 * as named sections) as well as section names because a named section name 76 * is represented basically like any other header but follows an empty line 77 * and the key is always "Name". 78 * Relative to the start of the value, this way the same offset to the 79 * character to test breaking can be used in all cases. 80 */ 81 static final String FOUR_BYTE_NAME = "Name"; 82 83 /** 84 * Distinguishes main attributes headers, section names, and headers in 85 * named sections because an implementation might make a difference. 86 */ 87 enum PositionInManifest { 88 /** 89 * @see Attributes#writeMain 90 */ 91 MAIN_ATTRIBUTES, 92 /** 93 * @see Attributes#write 94 */ 95 SECTION_NAME, 96 /** 97 * @see Manifest#write 98 */ 99 NAMED_SECTION; 100 } 101 102 static String numByteUnicodeCharacter(int numBytes) { 103 String string; 104 switch (numBytes) { 105 case 1: string = "i"; break; 106 case 2: string = "\u00EF"; break; // small letter i with diaresis 107 case 3: string = "\uFB00"; break; // small double f ligature 108 case 4: string = Character.toString(0x2070E); break; // ? 109 default: throw new RuntimeException(); 110 } 111 assertEquals(string.getBytes(UTF_8).length, numBytes, 112 "self-test failed: unexpected UTF-8 encoded character length"); 113 return string; 114 } 115 116 /** 117 * Produces test cases with all combinations of circumstances covered in 118 * which a character could possibly be attempted to be broken across a line 119 * break onto a continuation line:<ul> 120 * <li>different sizes of a UTF-8 encoded characters: one, two, three, and 121 * four bytes,</li> 122 * <li>all possible positions of the character to test breaking with 123 * relative respect to the 72-byte line length limit including immediately 124 * before that character and immediately after the character and every 125 * position in between for multi-byte UTF-8 encoded characters,</li> 126 * <li>different number of preceding line breaks in the same value</li> 127 * <li>at the end of the value or followed by another character</li> 128 * <li>in a main attributes header value, section name, or named section 129 * header value (see also {@link #PositionInManifest})</li> 130 * </ul> 131 * The same set of test parameters is used to write and read manifests 132 * once without breaking characters apart 133 * ({@link #testWriteLineBreaksKeepCharactersTogether(int, int, int, int, 134 * PositionInManifest, String, String)}) and once with doing so 135 * ({@link #readCharactersBrokenAcrossLines(int, int, int, int, 136 * PositionInManifest, String, String)}). 137 * The latter case covers backwards compatibility and involves writing 138 * manifests like they were written before resolution of bug 6443578. 139 */ 140 @DataProvider(name = "lineBreakParameters") 141 public static Object[][] lineBreakParameters() { 142 LinkedList<Object[]> params = new LinkedList<>(); 143 144 // b: number of line breaks before character under test 145 for (int b = 0; b <= 3; b++) { 146 147 // c: unicode character UTF-8 encoded length in bytes 148 for (int c = 1; c <= 4; c++) { 149 150 // p: potential break position offset in bytes 151 // p == 0 => before character, 152 // p == c => after character, and 153 // 0 < p < c => character potentially broken across line break 154 // within the character 155 for (int p = c; p >= 0; p--) { 156 157 // a: no or one character following the one under test 158 // (a == 0 meaning the character under test is the end of 159 // the value which is followed by a line break in the 160 // resulting manifest without continuation line space which 161 // concludes the value) 162 for (int a = 0; a <= 1; a++) { 163 164 // offset: so many characters (actually bytes here, 165 // filled with one byte characters) are needed to place 166 // the next character (the character under test) into a 167 // position relative to the maximum line width that it 168 // may or may not have to be broken onto the next line 169 int offset = 170 // number of lines; - 1 due to continuation " " 171 b * (MANIFEST_LINE_CONTENT_WIDTH_BYTES - 1) 172 // line length minus "Name: ".length() 173 + MANIFEST_LINE_CONTENT_WIDTH_BYTES - 6 174 // position of maximum line width relative to 175 // beginning of encoded character 176 - p; 177 String value = ""; 178 for (int i = 0; i < offset - 1; i++) { 179 value += FILL1BYTE; 180 } 181 // character before the one to test the break 182 value += MARK_BEFORE; 183 String character = numByteUnicodeCharacter(c); 184 value += character; 185 for (int i = 0; i < a; i++) { 186 // character after the one to test the break 187 value += MARK_AFTER; 188 } 189 190 for (PositionInManifest i : 191 PositionInManifest.values()) { 192 193 params.add(new Object[] { 194 b, c, p, a, i, character, value}); 195 } 196 } 197 } 198 } 199 } 200 201 return params.toArray(new Object[][] {{}}); 202 } 203 204 /** 205 * Checks that unicode characters work well with line breaks and 206 * continuation lines in jar manifests without breaking a character across 207 * a line break even when encoded in UTF-8 with more than one byte. 208 * <p> 209 * For each of the cases provided by {@link #lineBreakParameters()} the 210 * break position is verified in the written manifest binary form as well 211 * as verified that it restores to the original values when read again. 212 * <p> 213 * As an additional check, the binary manifests are decoded from UTF-8 214 * into Strings before re-joining continued lines. 215 */ 216 @Test(dataProvider = "lineBreakParameters") 217 public void testWriteLineBreaksKeepCharactersTogether(int b, int c, int p, 218 int a, PositionInManifest i, String character, String value) 219 throws IOException { 220 byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, value); 221 222 // in order to unambiguously establish the position of "character" in 223 // brokenPart, brokenPart is prepended and appended with what is 224 // expected before and after it... 225 String brokenPart = MARK_BEFORE; 226 227 // expect the whole character on the next line unless it fits 228 // completely on the current line 229 boolean breakExpected = p < c; 230 if (breakExpected) { 231 brokenPart += "\r\n "; 232 } 233 brokenPart += character; 234 // expect a line break before the next character if there is a next 235 // character and the previous not already broken on next line 236 if (a > 0) { 237 if (!breakExpected) { 238 brokenPart += "\r\n "; 239 } 240 brokenPart += MARK_AFTER; 241 } 242 brokenPart = brokenPart + "\r\n"; 243 try { 244 assertOccurrence(mfBytes, brokenPart.getBytes(UTF_8)); 245 readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value); 246 decodeManifestFromUTF8AndAssertHeaderValue( 247 mfBytes, FOUR_BYTE_NAME, value, true); 248 } catch (AssertionError e) { 249 System.out.println("-".repeat(72)); 250 System.out.print(new String(mfBytes, UTF_8)); 251 System.out.println("-".repeat(72)); 252 throw e; 253 } 254 } 255 256 static byte[] writeManifest(PositionInManifest i, String name, 257 String value) throws IOException { 258 Manifest mf = new Manifest(); 259 mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0"); 260 Attributes attributes = new Attributes(); 261 262 switch (i) { 263 case MAIN_ATTRIBUTES: 264 mf.getMainAttributes().put(new Name(name), value); 265 break; 266 case SECTION_NAME: 267 mf.getEntries().put(value, attributes); 268 break; 269 case NAMED_SECTION: 270 mf.getEntries().put(FOUR_BYTE_NAME, attributes); 271 attributes.put(new Name(name), value); 272 break; 273 } 274 275 ByteArrayOutputStream out = new ByteArrayOutputStream(); 276 mf.write(out); 277 return out.toByteArray(); 278 } 279 280 /** 281 * Asserts one and only one occurrence of a sequence of bytes {@code part} 282 * representing the character and how it is expected to be broken and its 283 * surrounding bytes in a larger sequence that corresponds to the manifest 284 * in binary form {@code mf}. 285 */ 286 static void assertOccurrence(byte[] mf, byte[] part) { 287 List<Integer> matchPos = new LinkedList<>(); 288 for (int i = 0; i < mf.length; i++) { 289 for (int j = 0; j < part.length && i + j <= mf.length; j++) { 290 if (part[j] == 0) { 291 if (i + j != mf.length) { 292 break; // expected eof not found 293 } 294 } else if (i + j == mf.length) { 295 break; 296 } else if (mf[i + j] != part[j]) { 297 break; 298 } 299 if (j == part.length - 1) { 300 matchPos.add(i); 301 } 302 } 303 } 304 assertEquals(matchPos.size(), 1, "not " 305 + (matchPos.size() < 1 ? "found" : "unique") + ": '" 306 + new String(part, UTF_8) + "'"); 307 } 308 309 static void readManifestAndAssertValue( 310 byte[] mfBytes, PositionInManifest i, String name, String value) 311 throws IOException { 312 Manifest mf = new Manifest(new ByteArrayInputStream(mfBytes)); 313 314 switch (i) { 315 case MAIN_ATTRIBUTES: 316 assertEquals(mf.getMainAttributes().getValue(name), value, 317 "main attributes header value"); 318 break; 319 case SECTION_NAME: 320 Attributes attributes = mf.getAttributes(value); 321 assertNotNull(attributes, "named section not found"); 322 break; 323 case NAMED_SECTION: 324 attributes = mf.getAttributes(FOUR_BYTE_NAME); 325 assertEquals(attributes.getValue(name), value, 326 "named section attributes header value"); 327 break; 328 } 329 } 330 331 /** 332 * Decodes a binary manifest {@code mfBytes} into UTF-8 first, before 333 * joining the continuation lines unlike {@link Manifest} and 334 * {@link Attributes} which join the continuation lines first, before 335 * decoding the joined line from UTF-8 into a {@link String}, indicating 336 * the binary manifest is valid UTF-8. 337 */ 338 static void decodeManifestFromUTF8AndAssertHeaderValue( 339 byte[] mfBytes, String name, String value, 340 boolean validUTF8ManifestExpected) throws IOException { 341 String mf = new String(mfBytes, UTF_8); 342 mf = mf.replaceAll("(\\r\\n|(?!\\r)\\n|\\r(?!\\n)) ", ""); 343 assertHeaderValueInManifestAsString( 344 mf, name, value, validUTF8ManifestExpected); 345 } 346 347 static void assertHeaderValueInManifestAsString( 348 String mf, String name, String value, 349 boolean validUTF8ManifestExpected) throws IOException { 350 String header = "\r\n" + name + ": " + value + "\r\n"; 351 int pos = mf.indexOf(header); 352 if (validUTF8ManifestExpected) { 353 assertTrue(pos > 0); 354 pos = mf.indexOf(header, pos + 1); // unique, no next occurrence 355 } 356 assertTrue(pos == -1); 357 } 358 359 @Test(dataProvider = "lineBreakParameters") 360 public void readCharactersBrokenAcrossLines(int b, int c, int p, int a, 361 PositionInManifest i, String character, String value) 362 throws IOException { 363 byte[] mfBytes = writeManifestWithBrokenCharacters(i, 364 FOUR_BYTE_NAME, value); 365 366 ByteArrayOutputStream buf = new ByteArrayOutputStream(); 367 buf.write(MARK_BEFORE.getBytes(UTF_8)); 368 byte[] characterBytes = character.getBytes(UTF_8); 369 // the portion of the character that fits on the current line before 370 // a break at 72 bytes, ranges from nothing (p == 0) to the whole 371 // character (p == c) 372 for (int j = 0; j < p; j++) { 373 buf.write(characterBytes, j, 1); 374 } 375 // expect a line break at exactly 72 bytes from the beginning of the 376 // line unless the whole character fits on that line 377 boolean breakExpected = p < c; 378 if (breakExpected) { 379 buf.write("\r\n ".getBytes(UTF_8)); 380 } 381 // the remaining portion of the character, if any 382 for (int j = p; j < c; j++) { 383 buf.write(characterBytes, j, 1); 384 } 385 // expect another line break if the whole character fitted on the same 386 // line and there is another character 387 if (a == 1) { 388 if (c == p) { 389 buf.write("\r\n ".getBytes(UTF_8)); 390 } 391 buf.write(MARK_AFTER.getBytes(UTF_8)); 392 } 393 // if no other character followed expect a line break immediately 394 buf.write("\r\n".getBytes(UTF_8)); 395 byte[] brokenPart = buf.toByteArray(); 396 try { 397 assertOccurrence(mfBytes, brokenPart); 398 readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value); 399 decodeManifestFromUTF8AndAssertHeaderValue( 400 mfBytes, FOUR_BYTE_NAME, value, p == 0 || p == c); 401 } catch (AssertionError e) { 402 System.out.println("-".repeat(72)); 403 System.out.print(new String(mfBytes, UTF_8)); 404 System.out.println("-".repeat(72)); 405 throw e; 406 } 407 } 408 409 /** 410 * From the previous {@link Manifest} implementation reduced to the minimum 411 * required to demonstrate compatibility. 412 */ 413 @SuppressWarnings("deprecation") 414 static byte[] writeManifestWithBrokenCharacters( 415 PositionInManifest i, String name, String value) 416 throws IOException { 417 byte[] vb = value.getBytes(UTF_8); 418 value = new String(vb, 0, 0, vb.length); 419 ByteArrayOutputStream out = new ByteArrayOutputStream(); 420 DataOutputStream dos = new DataOutputStream(out); 421 dos.writeBytes(Name.MANIFEST_VERSION + ": 0.1\r\n"); 422 423 if (i == PositionInManifest.MAIN_ATTRIBUTES) { 424 StringBuffer buffer = new StringBuffer(name); 425 buffer.append(": "); 426 buffer.append(value); 427 make72Safe(buffer); 428 buffer.append("\r\n"); 429 dos.writeBytes(buffer.toString()); 430 } 431 dos.writeBytes("\r\n"); 432 433 if (i == PositionInManifest.SECTION_NAME || 434 i == PositionInManifest.NAMED_SECTION) { 435 StringBuffer buffer = new StringBuffer("Name: "); 436 if (i == PositionInManifest.SECTION_NAME) { 437 buffer.append(value); 438 } else { 439 buffer.append(FOUR_BYTE_NAME); 440 } 441 make72Safe(buffer); 442 buffer.append("\r\n"); 443 dos.writeBytes(buffer.toString()); 444 445 if (i == PositionInManifest.NAMED_SECTION) { 446 buffer = new StringBuffer(name); 447 buffer.append(": "); 448 buffer.append(value); 449 make72Safe(buffer); 450 buffer.append("\r\n"); 451 dos.writeBytes(buffer.toString()); 452 } 453 454 dos.writeBytes("\r\n"); 455 } 456 457 dos.flush(); 458 return out.toByteArray(); 459 } 460 461 /** 462 * Adds line breaks to enforce a maximum 72 bytes per line. 463 * <p> 464 * From previous Manifest implementation without respect for UTF-8 encoded 465 * character boundaries breaking also within multi-byte UTF-8 encoded 466 * characters. 467 * 468 * @see {@link Manifest#make72Safe(StringBuffer)} 469 */ 470 static void make72Safe(StringBuffer line) { 471 int length = line.length(); 472 int index = 72; 473 while (index < length) { 474 line.insert(index, "\r\n "); 475 index += 74; // + line width + line break ("\r\n") 476 length += 3; // + line break ("\r\n") and space 477 } 478 } 479 480 @DataProvider(name = "positionInManifestValues") 481 public static Object[][] positionInManifestValues() { 482 LinkedList<Object[]> params = new LinkedList<>(); 483 for (PositionInManifest i : PositionInManifest.values()) { 484 params.add(new Object[] {i}); 485 } 486 return params.toArray(new Object[][] {{}}); 487 } 488 489 @Test(dataProvider = "positionInManifestValues") 490 public void testEmptyValues(PositionInManifest i) throws Exception { 491 byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, ""); 492 readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, ""); 493 } 494 495 }