# HG changeset patch # User jlaskey # Date 1558472046 10800 # Tue May 21 17:54:06 2019 -0300 # Node ID 74ce9411b7e08eb1e4ff348c0cfb83a62825bc63 # Parent 8c977741c3c88bb8847a82b7f83768079612cca4 8223775: String::stripIndent (Preview) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -36,6 +36,7 @@ import java.util.Arrays; import java.util.Comparator; import java.util.Formatter; +import java.util.List; import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -2797,11 +2798,6 @@ return indexOfNonWhitespace() == length(); } - private Stream lines(int maxLeading, int maxTrailing) { - return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing) - : StringUTF16.lines(value, maxLeading, maxTrailing); - } - /** * Returns a stream of lines extracted from this string, * separated by line terminators. @@ -2833,7 +2829,7 @@ * @since 11 */ public Stream lines() { - return lines(0, 0); + return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value); } /** @@ -2873,12 +2869,10 @@ * @since 12 */ public String indent(int n) { - return isEmpty() ? "" : indent(n, false); - } - - private String indent(int n, boolean removeBlanks) { - Stream stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE) - : lines(); + if (isEmpty()) { + return ""; + } + Stream stream = lines(); if (n > 0) { final String spaces = " ".repeat(n); stream = stream.map(s -> spaces + s); @@ -2901,6 +2895,108 @@ } /** + * Returns a string whose value is this string, with incidental white space + * removed from the beginning and end of every line. + *

+ * Incidental white space is often present in a text block to align the + * content with the opening delimiter. For example, in the following code, + * dots represent incidental white space: + *

+     * String html = """
+     * ..............<html>
+     * ..............    <body>
+     * ..............        <p>Hello, world</p>
+     * ..............    </body>
+     * ..............</html>
+     * ..............""";
+     * 
+ * This method treats the incidental white space as indentation to be + * stripped, producing a string that preserves the relative indentation of + * the content. Using | to visualize the start of each line of the string: + *
+     * |<html>
+     * |    <body>
+     * |        <p>Hello, world</p>
+     * |    </body>
+     * |</html>
+     * 
+ * First, this string is conceptually separated into lines as if by + * {@link String#lines()}. + *

+ * Then, the minimum indentation (min) is determined as follows. + * For each non-blank line (as defined by {@link String#isBlank()}), the + * leading {@link Character#isWhitespace(int) white space} characters are + * counted. The leading {@link Character#isWhitespace(int) white space} + * characters on the last line are are also counted even if blank. + * The min value is the smallest of these counts. + *

+ * For each non-blank line, min leading white space characters are + * removed, and any trailing white space characters are removed. + *

+ * Finally, the lines are joined with a line feed character {@code "\n"} + * (U+000A) into a single string and returned. + * + * @apiNote + * This method's primary purpose is to shift a block of lines as far as + * possible to the left, while preserving relative indentation. Lines + * that were indented the least will thus have no leading white space. + * + * @implNote + * This method treats all white space characters as having equal weight. + * As long as the indentation on every line is consistently composed + * of the same character sequences, then the result will be as described + * above. + * + * @return string with margins removed and line terminators normalized + * + * @see String#lines() + * @see String#isBlank() + * @see String#indent(int) + * @see Character#isWhitespace(int) + * + * @since 13 + * + * @deprecated Preview feature associated with Text Blocks. + * Use at your own risk. + */ + @Deprecated(forRemoval=true, since="13") + public String stripIndent() { + int length = length(); + if (length == 0) { + return ""; + } + char lastChar = charAt(length - 1); + boolean optOut = lastChar == '\n' || lastChar == '\r'; + List lines = lines().collect(Collectors.toList()); + final int outdent = optOut ? 0 : outdent(lines); + return lines.stream() + .map(line -> { + int firstNonWhitespace = line.indexOfNonWhitespace(); + int lastNonWhitespace = line.lastIndexOfNonWhitespace(); + return firstNonWhitespace > lastNonWhitespace + ? "" : line.substring(Math.min(outdent, firstNonWhitespace), lastNonWhitespace); + }) + .collect(Collectors.joining("\n", "", optOut ? "\n" : "")); + } + + private static int outdent(List lines) { + // Note: outdent is guaranteed to be zero or positive number. + // If there isn't a non-blank line then the last must be blank + int outdent = Integer.MAX_VALUE; + for (String line : lines) { + int leadingWhitespace = line.indexOfNonWhitespace(); + if (leadingWhitespace != line.length()) { + outdent = Integer.min(outdent, leadingWhitespace); + } + } + String lastLine = lines.get(lines.size() - 1); + if (lastLine.isBlank()) { + outdent = Integer.min(outdent, lastLine.length()); + } + return outdent; + } + + /** * This method allows the application of a function to {@code this} * string. The function should expect a single String argument * and produce an {@code R} result. diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java --- a/src/java.base/share/classes/java/lang/StringLatin1.java +++ b/src/java.base/share/classes/java/lang/StringLatin1.java @@ -747,76 +747,10 @@ static LinesSpliterator spliterator(byte[] value) { return new LinesSpliterator(value, 0, value.length); } - - static LinesSpliterator spliterator(byte[] value, int leading, int trailing) { - int length = value.length; - int left = 0; - int index; - for (int l = 0; l < leading; l++) { - index = skipBlankForward(value, left, length); - if (index == left) { - break; - } - left = index; - } - int right = length; - for (int t = 0; t < trailing; t++) { - index = skipBlankBackward(value, left, right); - if (index == right) { - break; - } - right = index; - } - return new LinesSpliterator(value, left, right - left); - } - - private static int skipBlankForward(byte[] value, int start, int length) { - int index = start; - while (index < length) { - char ch = getChar(value, index++); - if (ch == '\n') { - return index; - } - if (ch == '\r') { - if (index < length && getChar(value, index) == '\n') { - return index + 1; - } - return index; - } - if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { - return start; - } - } - return length; - } - - private static int skipBlankBackward(byte[] value, int start, int fence) { - int index = fence; - if (start < index && getChar(value, index - 1) == '\n') { - index--; - } - if (start < index && getChar(value, index - 1) == '\r') { - index--; - } - while (start < index) { - char ch = getChar(value, --index); - if (ch == '\r' || ch == '\n') { - return index + 1; - } - if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { - return fence; - } - } - return start; - } } - static Stream lines(byte[] value, int leading, int trailing) { - if (leading == 0 && trailing == 0) { - return StreamSupport.stream(LinesSpliterator.spliterator(value), false); - } else { - return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false); - } + static Stream lines(byte[] value) { + return StreamSupport.stream(LinesSpliterator.spliterator(value), false); } public static void putChar(byte[] val, int index, int c) { diff --git a/src/java.base/share/classes/java/lang/StringUTF16.java b/src/java.base/share/classes/java/lang/StringUTF16.java --- a/src/java.base/share/classes/java/lang/StringUTF16.java +++ b/src/java.base/share/classes/java/lang/StringUTF16.java @@ -1126,76 +1126,10 @@ static LinesSpliterator spliterator(byte[] value) { return new LinesSpliterator(value, 0, value.length >>> 1); } - - static LinesSpliterator spliterator(byte[] value, int leading, int trailing) { - int length = value.length >>> 1; - int left = 0; - int index; - for (int l = 0; l < leading; l++) { - index = skipBlankForward(value, left, length); - if (index == left) { - break; - } - left = index; - } - int right = length; - for (int t = 0; t < trailing; t++) { - index = skipBlankBackward(value, left, right); - if (index == right) { - break; - } - right = index; - } - return new LinesSpliterator(value, left, right - left); - } - - private static int skipBlankForward(byte[] value, int start, int length) { - int index = start; - while (index < length) { - char ch = getChar(value, index++); - if (ch == '\n') { - return index; - } - if (ch == '\r') { - if (index < length && getChar(value, index) == '\n') { - return index + 1; - } - return index; - } - if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { - return start; - } - } - return length; - } - - private static int skipBlankBackward(byte[] value, int start, int fence) { - int index = fence; - if (start < index && getChar(value, index - 1) == '\n') { - index--; - } - if (start < index && getChar(value, index - 1) == '\r') { - index--; - } - while (start < index) { - char ch = getChar(value, --index); - if (ch == '\r' || ch == '\n') { - return index + 1; - } - if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { - return fence; - } - } - return start; - } } - static Stream lines(byte[] value, int leading, int trailing) { - if (leading == 0 && trailing == 0) { - return StreamSupport.stream(LinesSpliterator.spliterator(value), false); - } else { - return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false); - } + static Stream lines(byte[] value) { + return StreamSupport.stream(LinesSpliterator.spliterator(value), false); } private static void putChars(byte[] val, int index, char[] str, int off, int end) { diff --git a/test/jdk/java/lang/String/StripIndent.java b/test/jdk/java/lang/String/StripIndent.java new file mode 100644 --- /dev/null +++ b/test/jdk/java/lang/String/StripIndent.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @summary This exercises String#stripIndent patterns and limits. + * @run main/othervm -Xmx2g StripIndent + */ + +public class StripIndent { + public static void main(String... arg) { + test1(); + } + + /* + * Case combinations. + */ + static void test1() { + verify("", ""); + verify("abc", "abc"); + verify(" abc", "abc"); + verify("abc ", "abc"); + verify(" abc\n def\n ", "abc\ndef\n"); + verify(" abc\n def\n", " abc\n def\n"); + verify(" abc\n def", "abc\ndef"); + verify(" abc\n def\n ", "abc\n def\n"); + } + + static void verify(String a, String b) { + if (!a.stripIndent().equals(b)) { + System.err.format("\"%s\" not equal \"%s\"%n", a, b); + throw new RuntimeException(); + } + } +} + +