< prev index next >

src/java.base/share/classes/java/lang/String.java

Print this page
rev 54939 : 8223775: String::stripIndent (Preview)


  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.lang.invoke.MethodHandles;
  32 import java.lang.constant.Constable;
  33 import java.lang.constant.ConstantDesc;
  34 import java.nio.charset.Charset;
  35 import java.util.ArrayList;
  36 import java.util.Arrays;
  37 import java.util.Comparator;
  38 import java.util.Formatter;

  39 import java.util.Locale;
  40 import java.util.Objects;
  41 import java.util.Optional;
  42 import java.util.Spliterator;
  43 import java.util.StringJoiner;
  44 import java.util.function.Function;
  45 import java.util.regex.Matcher;
  46 import java.util.regex.Pattern;
  47 import java.util.regex.PatternSyntaxException;
  48 import java.util.stream.Collectors;
  49 import java.util.stream.IntStream;
  50 import java.util.stream.Stream;
  51 import java.util.stream.StreamSupport;
  52 import jdk.internal.HotSpotIntrinsicCandidate;
  53 import jdk.internal.vm.annotation.Stable;
  54 
  55 import static java.util.function.Predicate.not;
  56 
  57 /**
  58  * The {@code String} class represents character strings. All


2780         return ret == null ? this : ret;
2781     }
2782 
2783     /**
2784      * Returns {@code true} if the string is empty or contains only
2785      * {@link Character#isWhitespace(int) white space} codepoints,
2786      * otherwise {@code false}.
2787      *
2788      * @return {@code true} if the string is empty or contains only
2789      *         {@link Character#isWhitespace(int) white space} codepoints,
2790      *         otherwise {@code false}
2791      *
2792      * @see Character#isWhitespace(int)
2793      *
2794      * @since 11
2795      */
2796     public boolean isBlank() {
2797         return indexOfNonWhitespace() == length();
2798     }
2799 
2800     private Stream<String> lines(int maxLeading, int maxTrailing) {
2801         return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing)
2802                           : StringUTF16.lines(value, maxLeading, maxTrailing);
2803     }
2804 
2805     /**
2806      * Returns a stream of lines extracted from this string,
2807      * separated by line terminators.
2808      * <p>
2809      * A <i>line terminator</i> is one of the following:
2810      * a line feed character {@code "\n"} (U+000A),
2811      * a carriage return character {@code "\r"} (U+000D),
2812      * or a carriage return followed immediately by a line feed
2813      * {@code "\r\n"} (U+000D U+000A).
2814      * <p>
2815      * A <i>line</i> is either a sequence of zero or more characters
2816      * followed by a line terminator, or it is a sequence of one or
2817      * more characters followed by the end of the string. A
2818      * line does not include the line terminator.
2819      * <p>
2820      * The stream returned by this method contains the lines from
2821      * this string in the order in which they occur.
2822      *
2823      * @apiNote This definition of <i>line</i> implies that an empty
2824      *          string has zero lines and that there is no empty line
2825      *          following a line terminator at the end of a string.
2826      *
2827      * @implNote This method provides better performance than
2828      *           split("\R") by supplying elements lazily and
2829      *           by faster search of new line terminators.
2830      *
2831      * @return  the stream of lines extracted from this string
2832      *
2833      * @since 11
2834      */
2835     public Stream<String> lines() {
2836         return lines(0, 0);
2837     }
2838 
2839     /**
2840      * Adjusts the indentation of each line of this string based on the value of
2841      * {@code n}, and normalizes line termination characters.
2842      * <p>
2843      * This string is conceptually separated into lines using
2844      * {@link String#lines()}. Each line is then adjusted as described below
2845      * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
2846      * lines are then concatenated and returned.
2847      * <p>
2848      * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
2849      * beginning of each line.
2850      * <p>
2851      * If {@code n < 0} then up to {@code n}
2852      * {@link Character#isWhitespace(int) white space characters} are removed
2853      * from the beginning of each line. If a given line does not contain
2854      * sufficient white space then all leading
2855      * {@link Character#isWhitespace(int) white space characters} are removed.
2856      * Each white space character is treated as a single character. In
2857      * particular, the tab character {@code "\t"} (U+0009) is considered a
2858      * single character; it is not expanded.
2859      * <p>
2860      * If {@code n == 0} then the line remains unchanged. However, line
2861      * terminators are still normalized.
2862      *
2863      * @param n  number of leading
2864      *           {@link Character#isWhitespace(int) white space characters}
2865      *           to add or remove
2866      *
2867      * @return string with indentation adjusted and line endings normalized
2868      *
2869      * @see String#lines()
2870      * @see String#isBlank()
2871      * @see Character#isWhitespace(int)
2872      *
2873      * @since 12
2874      */
2875     public String indent(int n) {
2876         return isEmpty() ? "" :  indent(n, false);

2877     }
2878 
2879     private String indent(int n, boolean removeBlanks) {
2880         Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE)
2881                                              : lines();
2882         if (n > 0) {
2883             final String spaces = " ".repeat(n);
2884             stream = stream.map(s -> spaces + s);
2885         } else if (n == Integer.MIN_VALUE) {
2886             stream = stream.map(s -> s.stripLeading());
2887         } else if (n < 0) {
2888             stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
2889         }
2890         return stream.collect(Collectors.joining("\n", "", "\n"));
2891     }
2892 
2893     private int indexOfNonWhitespace() {
2894         return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
2895                           : StringUTF16.indexOfNonWhitespace(value);
2896     }
2897 
2898     private int lastIndexOfNonWhitespace() {
2899         return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
2900                           : StringUTF16.lastIndexOfNonWhitespace(value);
2901     }
2902 
2903     /**






































































































2904      * This method allows the application of a function to {@code this}
2905      * string. The function should expect a single String argument
2906      * and produce an {@code R} result.
2907      * <p>
2908      * Any exception thrown by {@code f()} will be propagated to the
2909      * caller.
2910      *
2911      * @param f    functional interface to a apply
2912      *
2913      * @param <R>  class of the result
2914      *
2915      * @return     the result of applying the function to this string
2916      *
2917      * @see java.util.function.Function
2918      *
2919      * @since 12
2920      */
2921     public <R> R transform(Function<? super String, ? extends R> f) {
2922         return f.apply(this);
2923     }




  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.lang.invoke.MethodHandles;
  32 import java.lang.constant.Constable;
  33 import java.lang.constant.ConstantDesc;
  34 import java.nio.charset.Charset;
  35 import java.util.ArrayList;
  36 import java.util.Arrays;
  37 import java.util.Comparator;
  38 import java.util.Formatter;
  39 import java.util.List;
  40 import java.util.Locale;
  41 import java.util.Objects;
  42 import java.util.Optional;
  43 import java.util.Spliterator;
  44 import java.util.StringJoiner;
  45 import java.util.function.Function;
  46 import java.util.regex.Matcher;
  47 import java.util.regex.Pattern;
  48 import java.util.regex.PatternSyntaxException;
  49 import java.util.stream.Collectors;
  50 import java.util.stream.IntStream;
  51 import java.util.stream.Stream;
  52 import java.util.stream.StreamSupport;
  53 import jdk.internal.HotSpotIntrinsicCandidate;
  54 import jdk.internal.vm.annotation.Stable;
  55 
  56 import static java.util.function.Predicate.not;
  57 
  58 /**
  59  * The {@code String} class represents character strings. All


2781         return ret == null ? this : ret;
2782     }
2783 
2784     /**
2785      * Returns {@code true} if the string is empty or contains only
2786      * {@link Character#isWhitespace(int) white space} codepoints,
2787      * otherwise {@code false}.
2788      *
2789      * @return {@code true} if the string is empty or contains only
2790      *         {@link Character#isWhitespace(int) white space} codepoints,
2791      *         otherwise {@code false}
2792      *
2793      * @see Character#isWhitespace(int)
2794      *
2795      * @since 11
2796      */
2797     public boolean isBlank() {
2798         return indexOfNonWhitespace() == length();
2799     }
2800 





2801     /**
2802      * Returns a stream of lines extracted from this string,
2803      * separated by line terminators.
2804      * <p>
2805      * A <i>line terminator</i> is one of the following:
2806      * a line feed character {@code "\n"} (U+000A),
2807      * a carriage return character {@code "\r"} (U+000D),
2808      * or a carriage return followed immediately by a line feed
2809      * {@code "\r\n"} (U+000D U+000A).
2810      * <p>
2811      * A <i>line</i> is either a sequence of zero or more characters
2812      * followed by a line terminator, or it is a sequence of one or
2813      * more characters followed by the end of the string. A
2814      * line does not include the line terminator.
2815      * <p>
2816      * The stream returned by this method contains the lines from
2817      * this string in the order in which they occur.
2818      *
2819      * @apiNote This definition of <i>line</i> implies that an empty
2820      *          string has zero lines and that there is no empty line
2821      *          following a line terminator at the end of a string.
2822      *
2823      * @implNote This method provides better performance than
2824      *           split("\R") by supplying elements lazily and
2825      *           by faster search of new line terminators.
2826      *
2827      * @return  the stream of lines extracted from this string
2828      *
2829      * @since 11
2830      */
2831     public Stream<String> lines() {
2832         return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value);
2833     }
2834 
2835     /**
2836      * Adjusts the indentation of each line of this string based on the value of
2837      * {@code n}, and normalizes line termination characters.
2838      * <p>
2839      * This string is conceptually separated into lines using
2840      * {@link String#lines()}. Each line is then adjusted as described below
2841      * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
2842      * lines are then concatenated and returned.
2843      * <p>
2844      * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
2845      * beginning of each line.
2846      * <p>
2847      * If {@code n < 0} then up to {@code n}
2848      * {@link Character#isWhitespace(int) white space characters} are removed
2849      * from the beginning of each line. If a given line does not contain
2850      * sufficient white space then all leading
2851      * {@link Character#isWhitespace(int) white space characters} are removed.
2852      * Each white space character is treated as a single character. In
2853      * particular, the tab character {@code "\t"} (U+0009) is considered a
2854      * single character; it is not expanded.
2855      * <p>
2856      * If {@code n == 0} then the line remains unchanged. However, line
2857      * terminators are still normalized.
2858      *
2859      * @param n  number of leading
2860      *           {@link Character#isWhitespace(int) white space characters}
2861      *           to add or remove
2862      *
2863      * @return string with indentation adjusted and line endings normalized
2864      *
2865      * @see String#lines()
2866      * @see String#isBlank()
2867      * @see Character#isWhitespace(int)
2868      *
2869      * @since 12
2870      */
2871     public String indent(int n) {
2872         if (isEmpty()) {
2873             return "";
2874         }
2875         Stream<String> stream = lines();



2876         if (n > 0) {
2877             final String spaces = " ".repeat(n);
2878             stream = stream.map(s -> spaces + s);
2879         } else if (n == Integer.MIN_VALUE) {
2880             stream = stream.map(s -> s.stripLeading());
2881         } else if (n < 0) {
2882             stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
2883         }
2884         return stream.collect(Collectors.joining("\n", "", "\n"));
2885     }
2886 
2887     private int indexOfNonWhitespace() {
2888         return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
2889                           : StringUTF16.indexOfNonWhitespace(value);
2890     }
2891 
2892     private int lastIndexOfNonWhitespace() {
2893         return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
2894                           : StringUTF16.lastIndexOfNonWhitespace(value);
2895     }
2896 
2897     /**
2898      * Returns a string whose value is this string, with incidental white space
2899      * removed from the beginning and end of every line.
2900      * <p>
2901      * Incidental white space is often present in a text block to align the
2902      * content with the opening delimiter. For example, in the following code,
2903      * dots represent incidental white space:
2904      * <blockquote><pre>
2905      * String html = """
2906      * ..............&lt;html&gt;
2907      * ..............    &lt;body&gt;
2908      * ..............        &lt;p&gt;Hello, world&lt;/p&gt;
2909      * ..............    &lt;/body&gt;
2910      * ..............&lt;/html&gt;
2911      * ..............""";
2912      * </pre></blockquote>
2913      * This method treats the incidental white space as indentation to be
2914      * stripped, producing a string that preserves the relative indentation of
2915      * the content. Using | to visualize the start of each line of the string:
2916      * <blockquote><pre>
2917      * |&lt;html&gt;
2918      * |    &lt;body&gt;
2919      * |        &lt;p&gt;Hello, world&lt;/p&gt;
2920      * |    &lt;/body&gt;
2921      * |&lt;/html&gt;
2922      * </pre></blockquote>
2923      * First, this string is conceptually separated into lines as if by
2924      * {@link String#lines()}.
2925      * <p>
2926      * Then, the <i>minimum indentation</i> (min) is determined as follows.
2927      * For each non-blank line (as defined by {@link String#isBlank()}), the
2928      * leading {@link Character#isWhitespace(int) white space} characters are
2929      * counted. The leading {@link Character#isWhitespace(int) white space}
2930      * characters on the last line are are also counted even if blank.
2931      * The <i>min</i> value is the smallest of these counts.
2932      * <p>
2933      * For each non-blank line, <i>min</i> leading white space characters are
2934      * removed, and any trailing white space characters are removed.
2935      * <p>
2936      * Finally, the lines are joined with a line feed character {@code "\n"}
2937      * (U+000A) into a single string and returned.
2938      *
2939      * @apiNote
2940      * This method's primary purpose is to shift a block of lines as far as
2941      * possible to the left, while preserving relative indentation. Lines
2942      * that were indented the least will thus have no leading white space.
2943      *
2944      * @implNote
2945      * This method treats all white space characters as having equal weight.
2946      * As long as the indentation on every line is consistently composed
2947      * of the same character sequences, then the result will be as described
2948      * above.
2949      *
2950      * @return string with margins removed and line terminators normalized
2951      *
2952      * @see String#lines()
2953      * @see String#isBlank()
2954      * @see String#indent(int)
2955      * @see Character#isWhitespace(int)
2956      *
2957      * @since 13
2958      *
2959      * @deprecated  Preview feature associated with Text Blocks.
2960      *              Use at your own risk.
2961      */
2962     @Deprecated(forRemoval=true, since="13")
2963     public String stripIndent() {
2964         int length = length();
2965         if (length == 0) {
2966             return "";
2967         }
2968         char lastChar = charAt(length - 1);
2969         boolean optOut = lastChar == '\n' || lastChar == '\r';
2970         List<String> lines = lines().collect(Collectors.toList());
2971         final int outdent = optOut ? 0 : outdent(lines);
2972         return lines.stream()
2973             .map(line -> {
2974                 int firstNonWhitespace = line.indexOfNonWhitespace();
2975                 int lastNonWhitespace = line.lastIndexOfNonWhitespace();
2976                 return firstNonWhitespace > lastNonWhitespace
2977                     ? "" : line.substring(Math.min(outdent, firstNonWhitespace), lastNonWhitespace);
2978             })
2979             .collect(Collectors.joining("\n", "", optOut ? "\n" : ""));
2980     }
2981 
2982     private static int outdent(List<String> lines) {
2983         // Note: outdent is guaranteed to be zero or positive number.
2984         // If there isn't a non-blank line then the last must be blank
2985         int outdent = Integer.MAX_VALUE;
2986         for (String line : lines) {
2987             int leadingWhitespace = line.indexOfNonWhitespace();
2988             if (leadingWhitespace != line.length()) {
2989                 outdent = Integer.min(outdent, leadingWhitespace);
2990             }
2991         }
2992         String lastLine = lines.get(lines.size() - 1);
2993         if (lastLine.isBlank()) {
2994             outdent = Integer.min(outdent, lastLine.length());
2995         }
2996         return outdent;
2997     }
2998 
2999     /**
3000      * This method allows the application of a function to {@code this}
3001      * string. The function should expect a single String argument
3002      * and produce an {@code R} result.
3003      * <p>
3004      * Any exception thrown by {@code f()} will be propagated to the
3005      * caller.
3006      *
3007      * @param f    functional interface to a apply
3008      *
3009      * @param <R>  class of the result
3010      *
3011      * @return     the result of applying the function to this string
3012      *
3013      * @see java.util.function.Function
3014      *
3015      * @since 12
3016      */
3017     public <R> R transform(Function<? super String, ? extends R> f) {
3018         return f.apply(this);
3019     }


< prev index next >