19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import java.io.ObjectStreamField;
29 import java.io.UnsupportedEncodingException;
30 import java.lang.annotation.Native;
31 import java.lang.invoke.MethodHandles;
32 import java.lang.constant.Constable;
33 import java.lang.constant.ConstantDesc;
34 import java.nio.charset.Charset;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.Comparator;
38 import java.util.Formatter;
39 import java.util.Locale;
40 import java.util.Objects;
41 import java.util.Optional;
42 import java.util.Spliterator;
43 import java.util.StringJoiner;
44 import java.util.function.Function;
45 import java.util.regex.Matcher;
46 import java.util.regex.Pattern;
47 import java.util.regex.PatternSyntaxException;
48 import java.util.stream.Collectors;
49 import java.util.stream.IntStream;
50 import java.util.stream.Stream;
51 import java.util.stream.StreamSupport;
52 import jdk.internal.HotSpotIntrinsicCandidate;
53 import jdk.internal.vm.annotation.Stable;
54
55 import static java.util.function.Predicate.not;
56
57 /**
58 * The {@code String} class represents character strings. All
2780 return ret == null ? this : ret;
2781 }
2782
2783 /**
2784 * Returns {@code true} if the string is empty or contains only
2785 * {@link Character#isWhitespace(int) white space} codepoints,
2786 * otherwise {@code false}.
2787 *
2788 * @return {@code true} if the string is empty or contains only
2789 * {@link Character#isWhitespace(int) white space} codepoints,
2790 * otherwise {@code false}
2791 *
2792 * @see Character#isWhitespace(int)
2793 *
2794 * @since 11
2795 */
2796 public boolean isBlank() {
2797 return indexOfNonWhitespace() == length();
2798 }
2799
2800 private Stream<String> lines(int maxLeading, int maxTrailing) {
2801 return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing)
2802 : StringUTF16.lines(value, maxLeading, maxTrailing);
2803 }
2804
2805 /**
2806 * Returns a stream of lines extracted from this string,
2807 * separated by line terminators.
2808 * <p>
2809 * A <i>line terminator</i> is one of the following:
2810 * a line feed character {@code "\n"} (U+000A),
2811 * a carriage return character {@code "\r"} (U+000D),
2812 * or a carriage return followed immediately by a line feed
2813 * {@code "\r\n"} (U+000D U+000A).
2814 * <p>
2815 * A <i>line</i> is either a sequence of zero or more characters
2816 * followed by a line terminator, or it is a sequence of one or
2817 * more characters followed by the end of the string. A
2818 * line does not include the line terminator.
2819 * <p>
2820 * The stream returned by this method contains the lines from
2821 * this string in the order in which they occur.
2822 *
2823 * @apiNote This definition of <i>line</i> implies that an empty
2824 * string has zero lines and that there is no empty line
2825 * following a line terminator at the end of a string.
2826 *
2827 * @implNote This method provides better performance than
2828 * split("\R") by supplying elements lazily and
2829 * by faster search of new line terminators.
2830 *
2831 * @return the stream of lines extracted from this string
2832 *
2833 * @since 11
2834 */
2835 public Stream<String> lines() {
2836 return lines(0, 0);
2837 }
2838
2839 /**
2840 * Adjusts the indentation of each line of this string based on the value of
2841 * {@code n}, and normalizes line termination characters.
2842 * <p>
2843 * This string is conceptually separated into lines using
2844 * {@link String#lines()}. Each line is then adjusted as described below
2845 * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
2846 * lines are then concatenated and returned.
2847 * <p>
2848 * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
2849 * beginning of each line.
2850 * <p>
2851 * If {@code n < 0} then up to {@code n}
2852 * {@link Character#isWhitespace(int) white space characters} are removed
2853 * from the beginning of each line. If a given line does not contain
2854 * sufficient white space then all leading
2855 * {@link Character#isWhitespace(int) white space characters} are removed.
2856 * Each white space character is treated as a single character. In
2857 * particular, the tab character {@code "\t"} (U+0009) is considered a
2858 * single character; it is not expanded.
2859 * <p>
2860 * If {@code n == 0} then the line remains unchanged. However, line
2861 * terminators are still normalized.
2862 *
2863 * @param n number of leading
2864 * {@link Character#isWhitespace(int) white space characters}
2865 * to add or remove
2866 *
2867 * @return string with indentation adjusted and line endings normalized
2868 *
2869 * @see String#lines()
2870 * @see String#isBlank()
2871 * @see Character#isWhitespace(int)
2872 *
2873 * @since 12
2874 */
2875 public String indent(int n) {
2876 return isEmpty() ? "" : indent(n, false);
2877 }
2878
2879 private String indent(int n, boolean removeBlanks) {
2880 Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE)
2881 : lines();
2882 if (n > 0) {
2883 final String spaces = " ".repeat(n);
2884 stream = stream.map(s -> spaces + s);
2885 } else if (n == Integer.MIN_VALUE) {
2886 stream = stream.map(s -> s.stripLeading());
2887 } else if (n < 0) {
2888 stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
2889 }
2890 return stream.collect(Collectors.joining("\n", "", "\n"));
2891 }
2892
2893 private int indexOfNonWhitespace() {
2894 return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
2895 : StringUTF16.indexOfNonWhitespace(value);
2896 }
2897
2898 private int lastIndexOfNonWhitespace() {
2899 return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
2900 : StringUTF16.lastIndexOfNonWhitespace(value);
2901 }
2902
2903 /**
2904 * This method allows the application of a function to {@code this}
2905 * string. The function should expect a single String argument
2906 * and produce an {@code R} result.
2907 * <p>
2908 * Any exception thrown by {@code f()} will be propagated to the
2909 * caller.
2910 *
2911 * @param f functional interface to a apply
2912 *
2913 * @param <R> class of the result
2914 *
2915 * @return the result of applying the function to this string
2916 *
2917 * @see java.util.function.Function
2918 *
2919 * @since 12
2920 */
2921 public <R> R transform(Function<? super String, ? extends R> f) {
2922 return f.apply(this);
2923 }
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import java.io.ObjectStreamField;
29 import java.io.UnsupportedEncodingException;
30 import java.lang.annotation.Native;
31 import java.lang.invoke.MethodHandles;
32 import java.lang.constant.Constable;
33 import java.lang.constant.ConstantDesc;
34 import java.nio.charset.Charset;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.Comparator;
38 import java.util.Formatter;
39 import java.util.List;
40 import java.util.Locale;
41 import java.util.Objects;
42 import java.util.Optional;
43 import java.util.Spliterator;
44 import java.util.StringJoiner;
45 import java.util.function.Function;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48 import java.util.regex.PatternSyntaxException;
49 import java.util.stream.Collectors;
50 import java.util.stream.IntStream;
51 import java.util.stream.Stream;
52 import java.util.stream.StreamSupport;
53 import jdk.internal.HotSpotIntrinsicCandidate;
54 import jdk.internal.vm.annotation.Stable;
55
56 import static java.util.function.Predicate.not;
57
58 /**
59 * The {@code String} class represents character strings. All
2781 return ret == null ? this : ret;
2782 }
2783
2784 /**
2785 * Returns {@code true} if the string is empty or contains only
2786 * {@link Character#isWhitespace(int) white space} codepoints,
2787 * otherwise {@code false}.
2788 *
2789 * @return {@code true} if the string is empty or contains only
2790 * {@link Character#isWhitespace(int) white space} codepoints,
2791 * otherwise {@code false}
2792 *
2793 * @see Character#isWhitespace(int)
2794 *
2795 * @since 11
2796 */
2797 public boolean isBlank() {
2798 return indexOfNonWhitespace() == length();
2799 }
2800
2801 /**
2802 * Returns a stream of lines extracted from this string,
2803 * separated by line terminators.
2804 * <p>
2805 * A <i>line terminator</i> is one of the following:
2806 * a line feed character {@code "\n"} (U+000A),
2807 * a carriage return character {@code "\r"} (U+000D),
2808 * or a carriage return followed immediately by a line feed
2809 * {@code "\r\n"} (U+000D U+000A).
2810 * <p>
2811 * A <i>line</i> is either a sequence of zero or more characters
2812 * followed by a line terminator, or it is a sequence of one or
2813 * more characters followed by the end of the string. A
2814 * line does not include the line terminator.
2815 * <p>
2816 * The stream returned by this method contains the lines from
2817 * this string in the order in which they occur.
2818 *
2819 * @apiNote This definition of <i>line</i> implies that an empty
2820 * string has zero lines and that there is no empty line
2821 * following a line terminator at the end of a string.
2822 *
2823 * @implNote This method provides better performance than
2824 * split("\R") by supplying elements lazily and
2825 * by faster search of new line terminators.
2826 *
2827 * @return the stream of lines extracted from this string
2828 *
2829 * @since 11
2830 */
2831 public Stream<String> lines() {
2832 return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value);
2833 }
2834
2835 /**
2836 * Adjusts the indentation of each line of this string based on the value of
2837 * {@code n}, and normalizes line termination characters.
2838 * <p>
2839 * This string is conceptually separated into lines using
2840 * {@link String#lines()}. Each line is then adjusted as described below
2841 * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
2842 * lines are then concatenated and returned.
2843 * <p>
2844 * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
2845 * beginning of each line.
2846 * <p>
2847 * If {@code n < 0} then up to {@code n}
2848 * {@link Character#isWhitespace(int) white space characters} are removed
2849 * from the beginning of each line. If a given line does not contain
2850 * sufficient white space then all leading
2851 * {@link Character#isWhitespace(int) white space characters} are removed.
2852 * Each white space character is treated as a single character. In
2853 * particular, the tab character {@code "\t"} (U+0009) is considered a
2854 * single character; it is not expanded.
2855 * <p>
2856 * If {@code n == 0} then the line remains unchanged. However, line
2857 * terminators are still normalized.
2858 *
2859 * @param n number of leading
2860 * {@link Character#isWhitespace(int) white space characters}
2861 * to add or remove
2862 *
2863 * @return string with indentation adjusted and line endings normalized
2864 *
2865 * @see String#lines()
2866 * @see String#isBlank()
2867 * @see Character#isWhitespace(int)
2868 *
2869 * @since 12
2870 */
2871 public String indent(int n) {
2872 if (isEmpty()) {
2873 return "";
2874 }
2875 Stream<String> stream = lines();
2876 if (n > 0) {
2877 final String spaces = " ".repeat(n);
2878 stream = stream.map(s -> spaces + s);
2879 } else if (n == Integer.MIN_VALUE) {
2880 stream = stream.map(s -> s.stripLeading());
2881 } else if (n < 0) {
2882 stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
2883 }
2884 return stream.collect(Collectors.joining("\n", "", "\n"));
2885 }
2886
2887 private int indexOfNonWhitespace() {
2888 return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
2889 : StringUTF16.indexOfNonWhitespace(value);
2890 }
2891
2892 private int lastIndexOfNonWhitespace() {
2893 return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
2894 : StringUTF16.lastIndexOfNonWhitespace(value);
2895 }
2896
2897 /**
2898 * Returns a string whose value is this string, with incidental white space
2899 * removed from the beginning and end of every line.
2900 * <p>
2901 * Incidental white space is often present in a text block to align the
2902 * content with the opening delimiter. For example, in the following code,
2903 * dots represent incidental white space:
2904 * <blockquote><pre>
2905 * String html = """
2906 * ..............<html>
2907 * .............. <body>
2908 * .............. <p>Hello, world</p>
2909 * .............. </body>
2910 * ..............</html>
2911 * ..............""";
2912 * </pre></blockquote>
2913 * This method treats the incidental white space as indentation to be
2914 * stripped, producing a string that preserves the relative indentation of
2915 * the content. Using | to visualize the start of each line of the string:
2916 * <blockquote><pre>
2917 * |<html>
2918 * | <body>
2919 * | <p>Hello, world</p>
2920 * | </body>
2921 * |</html>
2922 * </pre></blockquote>
2923 * First, this string is conceptually separated into lines as if by
2924 * {@link String#lines()}.
2925 * <p>
2926 * Then, the <i>minimum indentation</i> (min) is determined as follows.
2927 * For each non-blank line (as defined by {@link String#isBlank()}), the
2928 * leading {@link Character#isWhitespace(int) white space} characters are
2929 * counted. The leading {@link Character#isWhitespace(int) white space}
2930 * characters on the last line are are also counted even if blank.
2931 * The <i>min</i> value is the smallest of these counts.
2932 * <p>
2933 * For each non-blank line, <i>min</i> leading white space characters are
2934 * removed, and any trailing white space characters are removed.
2935 * <p>
2936 * Finally, the lines are joined with a line feed character {@code "\n"}
2937 * (U+000A) into a single string and returned.
2938 *
2939 * @apiNote
2940 * This method's primary purpose is to shift a block of lines as far as
2941 * possible to the left, while preserving relative indentation. Lines
2942 * that were indented the least will thus have no leading white space.
2943 *
2944 * @implNote
2945 * This method treats all white space characters as having equal weight.
2946 * As long as the indentation on every line is consistently composed
2947 * of the same character sequences, then the result will be as described
2948 * above.
2949 *
2950 * @return string with margins removed and line terminators normalized
2951 *
2952 * @see String#lines()
2953 * @see String#isBlank()
2954 * @see String#indent(int)
2955 * @see Character#isWhitespace(int)
2956 *
2957 * @since 13
2958 *
2959 * @deprecated Preview feature associated with Text Blocks.
2960 * Use at your own risk.
2961 */
2962 @Deprecated(forRemoval=true, since="13")
2963 public String stripIndent() {
2964 int length = length();
2965 if (length == 0) {
2966 return "";
2967 }
2968 char lastChar = charAt(length - 1);
2969 boolean optOut = lastChar == '\n' || lastChar == '\r';
2970 List<String> lines = lines().collect(Collectors.toList());
2971 final int outdent = optOut ? 0 : outdent(lines);
2972 return lines.stream()
2973 .map(line -> {
2974 int firstNonWhitespace = line.indexOfNonWhitespace();
2975 int lastNonWhitespace = line.lastIndexOfNonWhitespace();
2976 return firstNonWhitespace > lastNonWhitespace
2977 ? "" : line.substring(Math.min(outdent, firstNonWhitespace), lastNonWhitespace);
2978 })
2979 .collect(Collectors.joining("\n", "", optOut ? "\n" : ""));
2980 }
2981
2982 private static int outdent(List<String> lines) {
2983 // Note: outdent is guaranteed to be zero or positive number.
2984 // If there isn't a non-blank line then the last must be blank
2985 int outdent = Integer.MAX_VALUE;
2986 for (String line : lines) {
2987 int leadingWhitespace = line.indexOfNonWhitespace();
2988 if (leadingWhitespace != line.length()) {
2989 outdent = Integer.min(outdent, leadingWhitespace);
2990 }
2991 }
2992 String lastLine = lines.get(lines.size() - 1);
2993 if (lastLine.isBlank()) {
2994 outdent = Integer.min(outdent, lastLine.length());
2995 }
2996 return outdent;
2997 }
2998
2999 /**
3000 * This method allows the application of a function to {@code this}
3001 * string. The function should expect a single String argument
3002 * and produce an {@code R} result.
3003 * <p>
3004 * Any exception thrown by {@code f()} will be propagated to the
3005 * caller.
3006 *
3007 * @param f functional interface to a apply
3008 *
3009 * @param <R> class of the result
3010 *
3011 * @return the result of applying the function to this string
3012 *
3013 * @see java.util.function.Function
3014 *
3015 * @since 12
3016 */
3017 public <R> R transform(Function<? super String, ? extends R> f) {
3018 return f.apply(this);
3019 }
|