jdk Udiff src/java.base/share/classes/java/util/jar/Manifest.java

src/java.base/share/classes/java/util/jar/Manifest.java

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -28,10 +28,11 @@
 import java.io.DataOutputStream;
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.text.BreakIterator;
 import java.util.HashMap;
 import java.util.Map;
 
 import sun.nio.cs.UTF_8;
 import sun.security.util.SecurityProperties;

@@ -228,29 +229,78 @@
             length += 3; // + line break ("\r\n") and space
         }
     }
 
     /**
+     * Returns {@code true} if the passed byte as parameter {@code b}
+     * is not the first (or only) byte of a Unicode character encoded in UTF-8
+     * and {@code false} otherwise.
+     *
+     * @see <a href="https://tools.ietf.org/html/rfc3629#section-3">
+     * RFC 3629 - UTF-8, a transformation format of ISO 10646</a>
+     * @see StringCoding#isNotContinuation(int)
+     * @see sun.nio.cs.UTF_8.Decoder#isNotContinuation(int)
+     */
+    private static boolean isContinuation(byte b) {
+        return (b & 0xc0) == 0x80;
+    }
+
+    /**
      * Writes {@code line} to {@code out} with line breaks and continuation
-     * spaces within the limits of 72 bytes of contents per line followed
-     * by a line break.
+     * spaces within the limits of 72 bytes of contents per line
+     * keeping byte sequences of characters encoded in UTF-8 together
+     * also if the same character is encoded with more than one byte or
+     * consists of a character sequence containing combining diacritical marks
+     * followed by a line break.
+     * <p>
+     * Combining diacritical marks may be separated from the associated base
+     * character or other combining diacritical marks of that base character
+     * by a continuation line break ("{@code \r\n }") if the whole sequence of
+     * base character and all the combining diacritical marks belonging to it
+     * exceed 71 bytes in their binary form encoded with UTF-8. This limit is
+     * only 71 bytes rather than 72 because continuation lines start with a
+     * space that uses the first byte of the 72 bytes each line can hold up to
+     * and the first line provides even less space for the value because it
+     * starts with the name.
      */
     static void println72(OutputStream out, String line) throws IOException {
-        if (!line.isEmpty()) {
-            byte[] lineBytes = line.getBytes(UTF_8.INSTANCE);
-            int length = lineBytes.length;
-            // first line can hold one byte more than subsequent lines which
-            // start with a continuation line break space
-            out.write(lineBytes[0]);
-            int pos = 1;
-            while (length - pos > 71) {
-                out.write(lineBytes, pos, 71);
-                pos += 71;
+        int linePos = 0; // number of bytes already put out on current line
+        BreakIterator boundary = BreakIterator.getCharacterInstance();
+        boundary.setText(line);
+        int start = boundary.first(), end;
+        while ((end = boundary.next()) != BreakIterator.DONE) {
+            String character = line.substring(start, end);
+            start = end;
+            byte[] characterBytes = character.getBytes(UTF_8.INSTANCE);
+            int characterLength = characterBytes.length;
+            // Put out a break onto a new line if the character does not fit on
+            // the current line anymore but fits on a whole new line.
+            // In other words, if the current character does not fit on one
+            // whole line alone, fill the current line first before breaking
+            // inside of the character onto a new line.
+            if (linePos + characterLength > 72 && characterLength < 72) {
+                println(out);
+                out.write(' ');
+                linePos = 1;
+            }
+            int characterPos = 0; // number of bytes of current character
+                                  // already put out
+            while (linePos + characterLength - characterPos > 72) {
+                int nextBreakPos = 72 - linePos;
+                while (isContinuation(
+                        characterBytes[characterPos + nextBreakPos])) {
+                    nextBreakPos--;
+                }
+                out.write(characterBytes, characterPos, nextBreakPos);
+                characterPos += nextBreakPos;
                 println(out);
                 out.write(' ');
+                linePos = 1;
             }
-            out.write(lineBytes, pos, length - pos);
+            out.write(characterBytes,
+                    characterPos, characterLength - characterPos);
+            linePos += characterLength - characterPos;
         }
         println(out);
     }
 
     /**

< prev index next >