< prev index next >

src/java.base/share/classes/java/lang/String.java

Print this page
rev 51519 : 8200434: String::align, String::indent
Reviewed-by: smarks


  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.nio.charset.Charset;
  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.Comparator;
  35 import java.util.Formatter;
  36 import java.util.Locale;
  37 import java.util.Objects;
  38 import java.util.Spliterator;
  39 import java.util.StringJoiner;
  40 import java.util.regex.Matcher;
  41 import java.util.regex.Pattern;
  42 import java.util.regex.PatternSyntaxException;

  43 import java.util.stream.IntStream;
  44 import java.util.stream.Stream;
  45 import java.util.stream.StreamSupport;
  46 import jdk.internal.HotSpotIntrinsicCandidate;
  47 import jdk.internal.vm.annotation.Stable;
  48 


  49 /**
  50  * The {@code String} class represents character strings. All
  51  * string literals in Java programs, such as {@code "abc"}, are
  52  * implemented as instances of this class.
  53  * <p>
  54  * Strings are constant; their values cannot be changed after they
  55  * are created. String buffers support mutable strings.
  56  * Because String objects are immutable they can be shared. For example:
  57  * <blockquote><pre>
  58  *     String str = "abc";
  59  * </pre></blockquote><p>
  60  * is equivalent to:
  61  * <blockquote><pre>
  62  *     char data[] = {'a', 'b', 'c'};
  63  *     String str = new String(data);
  64  * </pre></blockquote><p>
  65  * Here are some more examples of how strings can be used:
  66  * <blockquote><pre>
  67  *     System.out.println("abc");
  68  *     String cde = "cde";


2738         return ret == null ? this : ret;
2739     }
2740 
2741     /**
2742      * Returns {@code true} if the string is empty or contains only
2743      * {@link Character#isWhitespace(int) white space} codepoints,
2744      * otherwise {@code false}.
2745      *
2746      * @return {@code true} if the string is empty or contains only
2747      *         {@link Character#isWhitespace(int) white space} codepoints,
2748      *         otherwise {@code false}
2749      *
2750      * @see Character#isWhitespace(int)
2751      *
2752      * @since 11
2753      */
2754     public boolean isBlank() {
2755         return indexOfNonWhitespace() == length();
2756     }
2757 
2758     private int indexOfNonWhitespace() {
2759         if (isLatin1()) {
2760             return StringLatin1.indexOfNonWhitespace(value);
2761         } else {
2762             return StringUTF16.indexOfNonWhitespace(value);
2763         }
2764     }
2765 
2766     /**
2767      * Returns a stream of lines extracted from this string,
2768      * separated by line terminators.
2769      * <p>
2770      * A <i>line terminator</i> is one of the following:
2771      * a line feed character {@code "\n"} (U+000A),
2772      * a carriage return character {@code "\r"} (U+000D),
2773      * or a carriage return followed immediately by a line feed
2774      * {@code "\r\n"} (U+000D U+000A).
2775      * <p>
2776      * A <i>line</i> is either a sequence of zero or more characters
2777      * followed by a line terminator, or it is a sequence of one or
2778      * more characters followed by the end of the string. A
2779      * line does not include the line terminator.
2780      * <p>
2781      * The stream returned by this method contains the lines from
2782      * this string in the order in which they occur.
2783      *
2784      * @apiNote This definition of <i>line</i> implies that an empty
2785      *          string has zero lines and that there is no empty line
2786      *          following a line terminator at the end of a string.
2787      *
2788      * @implNote This method provides better performance than
2789      *           split("\R") by supplying elements lazily and
2790      *           by faster search of new line terminators.
2791      *
2792      * @return  the stream of lines extracted from this string
2793      *
2794      * @since 11
2795      */
2796     public Stream<String> lines() {
2797         return isLatin1() ? StringLatin1.lines(value)
2798                           : StringUTF16.lines(value);













































































































































































2799     }
2800 
2801     /**
2802      * This object (which is already a string!) is itself returned.
2803      *
2804      * @return  the string itself.
2805      */
2806     public String toString() {
2807         return this;
2808     }
2809 
2810     /**
2811      * Returns a stream of {@code int} zero-extending the {@code char} values
2812      * from this sequence.  Any char which maps to a <a
2813      * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code
2814      * point</a> is passed through uninterpreted.
2815      *
2816      * @return an IntStream of char values from this sequence
2817      * @since 9
2818      */




  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.nio.charset.Charset;
  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.Comparator;
  35 import java.util.Formatter;
  36 import java.util.Locale;
  37 import java.util.Objects;
  38 import java.util.Spliterator;
  39 import java.util.StringJoiner;
  40 import java.util.regex.Matcher;
  41 import java.util.regex.Pattern;
  42 import java.util.regex.PatternSyntaxException;
  43 import java.util.stream.Collectors;
  44 import java.util.stream.IntStream;
  45 import java.util.stream.Stream;
  46 import java.util.stream.StreamSupport;
  47 import jdk.internal.HotSpotIntrinsicCandidate;
  48 import jdk.internal.vm.annotation.Stable;
  49 
  50 import static java.util.function.Predicate.not;
  51 
  52 /**
  53  * The {@code String} class represents character strings. All
  54  * string literals in Java programs, such as {@code "abc"}, are
  55  * implemented as instances of this class.
  56  * <p>
  57  * Strings are constant; their values cannot be changed after they
  58  * are created. String buffers support mutable strings.
  59  * Because String objects are immutable they can be shared. For example:
  60  * <blockquote><pre>
  61  *     String str = "abc";
  62  * </pre></blockquote><p>
  63  * is equivalent to:
  64  * <blockquote><pre>
  65  *     char data[] = {'a', 'b', 'c'};
  66  *     String str = new String(data);
  67  * </pre></blockquote><p>
  68  * Here are some more examples of how strings can be used:
  69  * <blockquote><pre>
  70  *     System.out.println("abc");
  71  *     String cde = "cde";


2741         return ret == null ? this : ret;
2742     }
2743 
2744     /**
2745      * Returns {@code true} if the string is empty or contains only
2746      * {@link Character#isWhitespace(int) white space} codepoints,
2747      * otherwise {@code false}.
2748      *
2749      * @return {@code true} if the string is empty or contains only
2750      *         {@link Character#isWhitespace(int) white space} codepoints,
2751      *         otherwise {@code false}
2752      *
2753      * @see Character#isWhitespace(int)
2754      *
2755      * @since 11
2756      */
2757     public boolean isBlank() {
2758         return indexOfNonWhitespace() == length();
2759     }
2760 
2761     private Stream<String> lines(int maxLeading, int maxTrailing) {
2762         return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing)
2763                           : StringUTF16.lines(value, maxLeading, maxTrailing);



2764     }
2765 
2766     /**
2767      * Returns a stream of lines extracted from this string,
2768      * separated by line terminators.
2769      * <p>
2770      * A <i>line terminator</i> is one of the following:
2771      * a line feed character {@code "\n"} (U+000A),
2772      * a carriage return character {@code "\r"} (U+000D),
2773      * or a carriage return followed immediately by a line feed
2774      * {@code "\r\n"} (U+000D U+000A).
2775      * <p>
2776      * A <i>line</i> is either a sequence of zero or more characters
2777      * followed by a line terminator, or it is a sequence of one or
2778      * more characters followed by the end of the string. A
2779      * line does not include the line terminator.
2780      * <p>
2781      * The stream returned by this method contains the lines from
2782      * this string in the order in which they occur.
2783      *
2784      * @apiNote This definition of <i>line</i> implies that an empty
2785      *          string has zero lines and that there is no empty line
2786      *          following a line terminator at the end of a string.
2787      *
2788      * @implNote This method provides better performance than
2789      *           split("\R") by supplying elements lazily and
2790      *           by faster search of new line terminators.
2791      *
2792      * @return  the stream of lines extracted from this string
2793      *
2794      * @since 11
2795      */
2796     public Stream<String> lines() {
2797         return lines(0, 0);
2798     }
2799 
2800     /**
2801      * Adjusts the indentation of each line of this string based on the value of
2802      * {@code n}, and normalizes line termination characters.
2803      * <p>
2804      * This string is conceptually separated into lines using
2805      * {@link String#lines()}. Each line is then adjusted as described below
2806      * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
2807      * lines are then concatenated and returned.
2808      * <p>
2809      * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
2810      * beginning of each line. {@link String#isBlank() Blank lines} are
2811      * unaffected.
2812      * <p>
2813      * If {@code n < 0} then up to {@code n}
2814      * {@link Character#isWhitespace(int) white space characters} are removed
2815      * from the beginning of each line. If a given line does not contain
2816      * sufficient white space then all leading
2817      * {@link Character#isWhitespace(int) white space characters} are removed.
2818      * Each white space character is treated as a single character. In
2819      * particular, the tab character {@code "\t"} (U+0009) is considered a
2820      * single character; it is not expanded.
2821      * <p>
2822      * If {@code n == 0} then the line remains unchanged. However, line
2823      * terminators are still normalized.
2824      * <p>
2825      *
2826      * @param n  number of leading
2827      *           {@link Character#isWhitespace(int) white space characters}
2828      *           to add or remove
2829      *
2830      * @return string with indentation adjusted and line endings normalized
2831      *
2832      * @see String#lines()
2833      * @see String#isBlank()
2834      * @see Character#isWhitespace(int)
2835      *
2836      * @since 12
2837      */
2838     public String indent(int n) {
2839         return isEmpty() ? "" :  indent(n, false);
2840     }
2841 
2842     private String indent(int n, boolean removeBlanks) {
2843         Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE)
2844                                              : lines();
2845         if (n > 0) {
2846             final String spaces = " ".repeat(n);
2847             stream = stream.map(s -> s.isBlank() ? s : spaces + s);
2848         } else if (n == Integer.MIN_VALUE) {
2849             stream = stream.map(s -> s.stripLeading());
2850         } else if (n < 0) {
2851             stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
2852         }
2853         return stream.collect(Collectors.joining("\n", "", "\n"));
2854     }
2855 
2856     private int indexOfNonWhitespace() {
2857         return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
2858                           : StringUTF16.indexOfNonWhitespace(value);
2859     }
2860 
2861     private int lastIndexOfNonWhitespace() {
2862         return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
2863                           : StringUTF16.lastIndexOfNonWhitespace(value);
2864     }
2865 
2866     /**
2867      * Removes vertical and horizontal white space margins from around the
2868      * essential body of a multi-line string, while preserving relative
2869      * indentation.
2870      * <p>
2871      * This string is first conceptually separated into lines as if by
2872      * {@link String#lines()}.
2873      * <p>
2874      * Then, the <i>minimum indentation</i> (min) is determined as follows. For
2875      * each non-blank line (as defined by {@link String#isBlank()}), the
2876      * leading {@link Character#isWhitespace(int) white space} characters are
2877      * counted. The <i>min</i> value is the smallest of these counts.
2878      * <p>
2879      * For each non-blank line, <i>min</i> leading white space characters are
2880      * removed. Each white space character is treated as a single character. In
2881      * particular, the tab character {@code "\t"} (U+0009) is considered a
2882      * single character; it is not expanded.
2883      * <p>
2884      * Leading and trailing blank lines, if any, are removed. Trailing spaces are
2885      * preserved.
2886      * <p>
2887      * Each line is suffixed with a line feed character {@code "\n"} (U+000A).
2888      * <p>
2889      * Finally, the lines are concatenated into a single string and returned.
2890      *
2891      * @apiNote
2892      * This method's primary purpose is to shift a block of lines as far as
2893      * possible to the left, while preserving relative indentation. Lines
2894      * that were indented the least will thus have no leading white space.
2895      *
2896      * Example:
2897      * <blockquote><pre>
2898      * `
2899      *      This is the first line
2900      *          This is the second line
2901      * `.align();
2902      *
2903      * returns
2904      * This is the first line
2905      *     This is the second line
2906      * </pre></blockquote>
2907      *
2908      * @return string with margins removed and line terminators normalized
2909      *
2910      * @see String#lines()
2911      * @see String#isBlank()
2912      * @see String#indent(int)
2913      * @see Character#isWhitespace(int)
2914      *
2915      * @since 12
2916      */
2917     public String align() {
2918         return align(0);
2919     }
2920 
2921     /**
2922      * Removes vertical and horizontal white space margins from around the
2923      * essential body of a multi-line string, while preserving relative
2924      * indentation and with optional indentation adjustment.
2925      * <p>
2926      * Invoking this method is equivalent to:
2927      * <blockquote>
2928      *  {@code this.align().indent(n)}
2929      * </blockquote>
2930      *
2931      * @apiNote
2932      * Examples:
2933      * <blockquote><pre>
2934      * `
2935      *      This is the first line
2936      *          This is the second line
2937      * `.align(0);
2938      *
2939      * returns
2940      * This is the first line
2941      *     This is the second line
2942      *
2943      *
2944      * `
2945      *    This is the first line
2946      *       This is the second line
2947      * `.align(4);
2948      * returns
2949      *     This is the first line
2950      *         This is the second line
2951      * </pre></blockquote>
2952      *
2953      * @param n  number of leading white space characters
2954      *           to add or remove
2955      *
2956      * @return string with margins removed, indentation adjusted and
2957      *         line terminators normalized
2958      *
2959      * @see String#align()
2960      *
2961      * @since 12
2962      */
2963     public String align(int n) {
2964         if (isEmpty()) {
2965             return "";
2966         }
2967         int outdent = lines().filter(not(String::isBlank))
2968                              .mapToInt(String::indexOfNonWhitespace)
2969                              .min()
2970                              .orElse(0);
2971         return indent(n - outdent, true);
2972     }
2973 
2974     /**
2975      * This object (which is already a string!) is itself returned.
2976      *
2977      * @return  the string itself.
2978      */
2979     public String toString() {
2980         return this;
2981     }
2982 
2983     /**
2984      * Returns a stream of {@code int} zero-extending the {@code char} values
2985      * from this sequence.  Any char which maps to a <a
2986      * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code
2987      * point</a> is passed through uninterpreted.
2988      *
2989      * @return an IntStream of char values from this sequence
2990      * @since 9
2991      */


< prev index next >