src/share/classes/java/lang/String.java

Print this page
rev 6546 : 7197183: Alternate implementation of String.subSequence which uses shared backing array.
Reviewed-by: duke


   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 

  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.nio.charset.Charset;
  31 import java.util.ArrayList;
  32 import java.util.Arrays;
  33 import java.util.Comparator;
  34 import java.util.Formatter;
  35 import java.util.Locale;
  36 import java.util.Objects;
  37 import java.util.regex.Matcher;
  38 import java.util.regex.Pattern;
  39 import java.util.regex.PatternSyntaxException;
  40 
  41 /**
  42  * The {@code String} class represents character strings. All
  43  * string literals in Java programs, such as {@code "abc"}, are
  44  * implemented as instances of this class.
  45  * <p>
  46  * Strings are constant; their values cannot be changed after they
  47  * are created. String buffers support mutable strings.


  93  * Index values refer to {@code char} code units, so a supplementary
  94  * character uses two positions in a {@code String}.
  95  * <p>The {@code String} class provides methods for dealing with
  96  * Unicode code points (i.e., characters), in addition to those for
  97  * dealing with Unicode code units (i.e., {@code char} values).
  98  *
  99  * @author  Lee Boynton
 100  * @author  Arthur van Hoff
 101  * @author  Martin Buchholz
 102  * @author  Ulf Zibis
 103  * @see     java.lang.Object#toString()
 104  * @see     java.lang.StringBuffer
 105  * @see     java.lang.StringBuilder
 106  * @see     java.nio.charset.Charset
 107  * @since   JDK1.0
 108  */
 109 
 110 public final class String
 111     implements java.io.Serializable, Comparable<String>, CharSequence {
 112     /** The value is used for character storage. */
 113     private final char value[];
 114 
 115     /** Cache the hash code for the string */
 116     private int hash; // Default to 0
 117 
 118     /** use serialVersionUID from JDK 1.0.2 for interoperability */
 119     private static final long serialVersionUID = -6849794470754667710L;
 120 
 121     /**
 122      * Class String is special cased within the Serialization Stream Protocol.
 123      *
 124      * A String instance is written initially into an ObjectOutputStream in the
 125      * following format:
 126      * <pre>
 127      *      {@code TC_STRING} (utf String)
 128      * </pre>
 129      * The String is written by method {@code DataOutput.writeUTF}.
 130      * A new handle is generated to  refer to all future references to the
 131      * string instance within the stream.
 132      */
 133     private static final ObjectStreamField[] serialPersistentFields =


 948      */
 949     public byte[] getBytes() {
 950         return StringCoding.encode(value, 0, value.length);
 951     }
 952 
 953     /**
 954      * Compares this string to the specified object.  The result is {@code
 955      * true} if and only if the argument is not {@code null} and is a {@code
 956      * String} object that represents the same sequence of characters as this
 957      * object.
 958      *
 959      * @param  anObject
 960      *         The object to compare this {@code String} against
 961      *
 962      * @return  {@code true} if the given object represents a {@code String}
 963      *          equivalent to this string, {@code false} otherwise
 964      *
 965      * @see  #compareTo(String)
 966      * @see  #equalsIgnoreCase(String)
 967      */

 968     public boolean equals(Object anObject) {
 969         if (this == anObject) {
 970             return true;
 971         }
 972         if (anObject instanceof String) {
 973             String anotherString = (String) anObject;
 974             int n = value.length;
 975             if (n == anotherString.value.length) {
 976                 char v1[] = value;
 977                 char v2[] = anotherString.value;
 978                 int i = 0;
 979                 while (n-- != 0) {
 980                     if (v1[i] != v2[i])
 981                             return false;
 982                     i++;
 983                 }
 984                 return true;
 985             }



 986         }
 987         return false;
 988     }
 989 
 990     /**
 991      * Compares this string to the specified {@code StringBuffer}.  The result
 992      * is {@code true} if and only if this {@code String} represents the same
 993      * sequence of characters as the specified {@code StringBuffer}. This method
 994      * synchronizes on the {@code StringBuffer}.
 995      *
 996      * @param  sb
 997      *         The {@code StringBuffer} to compare this {@code String} against
 998      *
 999      * @return  {@code true} if this {@code String} represents the same
1000      *          sequence of characters as the specified {@code StringBuffer},
1001      *          {@code false} otherwise
1002      *
1003      * @since  1.4
1004      */
1005     public boolean contentEquals(StringBuffer sb) {


1142      */
1143     public int compareTo(String anotherString) {
1144         int len1 = value.length;
1145         int len2 = anotherString.value.length;
1146         int lim = Math.min(len1, len2);
1147         char v1[] = value;
1148         char v2[] = anotherString.value;
1149 
1150         int k = 0;
1151         while (k < lim) {
1152             char c1 = v1[k];
1153             char c2 = v2[k];
1154             if (c1 != c2) {
1155                 return c1 - c2;
1156             }
1157             k++;
1158         }
1159         return len1 - len2;
1160     }
1161 












1162     /**
1163      * A Comparator that orders {@code String} objects as by
1164      * {@code compareToIgnoreCase}. This comparator is serializable.
1165      * <p>
1166      * Note that this Comparator does <em>not</em> take locale into account,
1167      * and will result in an unsatisfactory ordering for certain locales.
1168      * The java.text package provides <em>Collators</em> to allow
1169      * locale-sensitive ordering.
1170      *
1171      * @see     java.text.Collator#compare(String, String)
1172      * @since   1.2
1173      */
1174     public static final Comparator<String> CASE_INSENSITIVE_ORDER
1175                                          = new CaseInsensitiveComparator();
1176     private static class CaseInsensitiveComparator
1177             implements Comparator<String>, java.io.Serializable {
1178         // use serialVersionUID from JDK 1.2.2 for interoperability
1179         private static final long serialVersionUID = 8575799808933029326L;
1180 
1181         public int compare(String s1, String s2) {


1945      *             {@code endIndex}.
1946      */
1947     public String substring(int beginIndex, int endIndex) {
1948         if (beginIndex < 0) {
1949             throw new StringIndexOutOfBoundsException(beginIndex);
1950         }
1951         if (endIndex > value.length) {
1952             throw new StringIndexOutOfBoundsException(endIndex);
1953         }
1954         int subLen = endIndex - beginIndex;
1955         if (subLen < 0) {
1956             throw new StringIndexOutOfBoundsException(subLen);
1957         }
1958         return ((beginIndex == 0) && (endIndex == value.length)) ? this
1959                 : new String(value, beginIndex, subLen);
1960     }
1961 
1962     /**
1963      * Returns a new character sequence that is a subsequence of this sequence.
1964      *
1965      * <p> An invocation of this method of the form
1966      *
1967      * <blockquote><pre>
1968      * str.subSequence(begin,&nbsp;end)</pre></blockquote>
1969      *
1970      * behaves in exactly the same way as the invocation
1971      *
1972      * <blockquote><pre>
1973      * str.substring(begin,&nbsp;end)</pre></blockquote>
1974      *
1975      * This method is defined so that the {@code String} class can implement
1976      * the {@link CharSequence} interface. </p>
1977      *
1978      * @param   beginIndex   the begin index, inclusive.
1979      * @param   endIndex     the end index, exclusive.
1980      * @return  the specified subsequence.
1981      *
1982      * @throws  IndexOutOfBoundsException
1983      *          if {@code beginIndex} or {@code endIndex} is negative,
1984      *          if {@code endIndex} is greater than {@code length()},
1985      *          or if {@code beginIndex} is greater than {@code endIndex}
1986      *
1987      * @since 1.4
1988      * @spec JSR-51
1989      */

1990     public CharSequence subSequence(int beginIndex, int endIndex) {
1991         return this.substring(beginIndex, endIndex);









































































































































































































1992     }
1993 
1994     /**
1995      * Concatenates the specified string to the end of this string.
1996      * <p>
1997      * If the length of the argument string is {@code 0}, then this
1998      * {@code String} object is returned. Otherwise, a new
1999      * {@code String} object is created, representing a character
2000      * sequence that is the concatenation of the character sequence
2001      * represented by this {@code String} object and the character
2002      * sequence represented by the argument string.<p>
2003      * Examples:
2004      * <blockquote><pre>
2005      * "cares".concat("s") returns "caress"
2006      * "to".concat("get").concat("her") returns "together"
2007      * </pre></blockquote>
2008      *
2009      * @param   str   the {@code String} that is concatenated to the end
2010      *                of this {@code String}.
2011      * @return  a string that represents the concatenation of this object's




   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamException;
  29 import java.io.ObjectStreamField;
  30 import java.io.UnsupportedEncodingException;
  31 import java.nio.charset.Charset;
  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.Comparator;
  35 import java.util.Formatter;
  36 import java.util.Locale;
  37 import java.util.Objects;
  38 import java.util.regex.Matcher;
  39 import java.util.regex.Pattern;
  40 import java.util.regex.PatternSyntaxException;
  41 
  42 /**
  43  * The {@code String} class represents character strings. All
  44  * string literals in Java programs, such as {@code "abc"}, are
  45  * implemented as instances of this class.
  46  * <p>
  47  * Strings are constant; their values cannot be changed after they
  48  * are created. String buffers support mutable strings.


  94  * Index values refer to {@code char} code units, so a supplementary
  95  * character uses two positions in a {@code String}.
  96  * <p>The {@code String} class provides methods for dealing with
  97  * Unicode code points (i.e., characters), in addition to those for
  98  * dealing with Unicode code units (i.e., {@code char} values).
  99  *
 100  * @author  Lee Boynton
 101  * @author  Arthur van Hoff
 102  * @author  Martin Buchholz
 103  * @author  Ulf Zibis
 104  * @see     java.lang.Object#toString()
 105  * @see     java.lang.StringBuffer
 106  * @see     java.lang.StringBuilder
 107  * @see     java.nio.charset.Charset
 108  * @since   JDK1.0
 109  */
 110 
 111 public final class String
 112     implements java.io.Serializable, Comparable<String>, CharSequence {
 113     /** The value is used for character storage. */
 114     final char value[];
 115 
 116     /** Cache the hash code for the string */
 117     private int hash; // Default to 0
 118 
 119     /** use serialVersionUID from JDK 1.0.2 for interoperability */
 120     private static final long serialVersionUID = -6849794470754667710L;
 121 
 122     /**
 123      * Class String is special cased within the Serialization Stream Protocol.
 124      *
 125      * A String instance is written initially into an ObjectOutputStream in the
 126      * following format:
 127      * <pre>
 128      *      {@code TC_STRING} (utf String)
 129      * </pre>
 130      * The String is written by method {@code DataOutput.writeUTF}.
 131      * A new handle is generated to  refer to all future references to the
 132      * string instance within the stream.
 133      */
 134     private static final ObjectStreamField[] serialPersistentFields =


 949      */
 950     public byte[] getBytes() {
 951         return StringCoding.encode(value, 0, value.length);
 952     }
 953 
 954     /**
 955      * Compares this string to the specified object.  The result is {@code
 956      * true} if and only if the argument is not {@code null} and is a {@code
 957      * String} object that represents the same sequence of characters as this
 958      * object.
 959      *
 960      * @param  anObject
 961      *         The object to compare this {@code String} against
 962      *
 963      * @return  {@code true} if the given object represents a {@code String}
 964      *          equivalent to this string, {@code false} otherwise
 965      *
 966      * @see  #compareTo(String)
 967      * @see  #equalsIgnoreCase(String)
 968      */
 969     @Override
 970     public boolean equals(Object anObject) {
 971         if (this == anObject) {
 972             return true;
 973         }
 974         if (anObject instanceof String) {
 975             String anotherString = (String) anObject;
 976             int n = value.length;
 977             if (n == anotherString.value.length) {
 978                 char v1[] = value;
 979                 char v2[] = anotherString.value;
 980                 int i = 0;
 981                 while (n-- != 0) {
 982                     if (v1[i] != v2[i])
 983                             return false;
 984                     i++;
 985                 }
 986                 return true;
 987             }
 988         } else if (anObject instanceof SubSequence) {
 989             // turn the tables to keep this method smaller.
 990             return anObject.equals(this);
 991         }
 992         return false;
 993     }
 994 
 995     /**
 996      * Compares this string to the specified {@code StringBuffer}.  The result
 997      * is {@code true} if and only if this {@code String} represents the same
 998      * sequence of characters as the specified {@code StringBuffer}. This method
 999      * synchronizes on the {@code StringBuffer}.
1000      *
1001      * @param  sb
1002      *         The {@code StringBuffer} to compare this {@code String} against
1003      *
1004      * @return  {@code true} if this {@code String} represents the same
1005      *          sequence of characters as the specified {@code StringBuffer},
1006      *          {@code false} otherwise
1007      *
1008      * @since  1.4
1009      */
1010     public boolean contentEquals(StringBuffer sb) {


1147      */
1148     public int compareTo(String anotherString) {
1149         int len1 = value.length;
1150         int len2 = anotherString.value.length;
1151         int lim = Math.min(len1, len2);
1152         char v1[] = value;
1153         char v2[] = anotherString.value;
1154 
1155         int k = 0;
1156         while (k < lim) {
1157             char c1 = v1[k];
1158             char c2 = v2[k];
1159             if (c1 != c2) {
1160                 return c1 - c2;
1161             }
1162             k++;
1163         }
1164         return len1 - len2;
1165     }
1166 
1167 // This is the method we want instead of the default bridge.
1168 //    public int compareTo(Object other) {
1169 //        if(other instanceof String) {
1170 //            return compareTo((String) other);
1171 //        } else if (other instanceof SubSequence) {
1172 //            // delegate to keep this method small.
1173 //            return - ((SubSequence) other).compareTo(this);
1174 //        } else {
1175 //            throw new ClassCastException();
1176 //        }
1177 //    }
1178 //
1179     /**
1180      * A Comparator that orders {@code String} objects as by
1181      * {@code compareToIgnoreCase}. This comparator is serializable.
1182      * <p>
1183      * Note that this Comparator does <em>not</em> take locale into account,
1184      * and will result in an unsatisfactory ordering for certain locales.
1185      * The java.text package provides <em>Collators</em> to allow
1186      * locale-sensitive ordering.
1187      *
1188      * @see     java.text.Collator#compare(String, String)
1189      * @since   1.2
1190      */
1191     public static final Comparator<String> CASE_INSENSITIVE_ORDER
1192                                          = new CaseInsensitiveComparator();
1193     private static class CaseInsensitiveComparator
1194             implements Comparator<String>, java.io.Serializable {
1195         // use serialVersionUID from JDK 1.2.2 for interoperability
1196         private static final long serialVersionUID = 8575799808933029326L;
1197 
1198         public int compare(String s1, String s2) {


1962      *             {@code endIndex}.
1963      */
1964     public String substring(int beginIndex, int endIndex) {
1965         if (beginIndex < 0) {
1966             throw new StringIndexOutOfBoundsException(beginIndex);
1967         }
1968         if (endIndex > value.length) {
1969             throw new StringIndexOutOfBoundsException(endIndex);
1970         }
1971         int subLen = endIndex - beginIndex;
1972         if (subLen < 0) {
1973             throw new StringIndexOutOfBoundsException(subLen);
1974         }
1975         return ((beginIndex == 0) && (endIndex == value.length)) ? this
1976                 : new String(value, beginIndex, subLen);
1977     }
1978 
1979     /**
1980      * Returns a new character sequence that is a subsequence of this sequence.
1981      *
1982      * @implNote The character sequence refers to the original String.











1983      *
1984      * @param   beginIndex   the begin index, inclusive.
1985      * @param   endIndex     the end index, exclusive.
1986      * @return  the specified subsequence.
1987      *
1988      * @throws  IndexOutOfBoundsException
1989      *          if {@code beginIndex} or {@code endIndex} is negative,
1990      *          if {@code endIndex} is greater than {@code length()},
1991      *          or if {@code beginIndex} is greater than {@code endIndex}
1992      *
1993      * @since 1.4
1994      * @spec JSR-51
1995      */
1996     @Override
1997     public CharSequence subSequence(int beginIndex, int endIndex) {
1998         if (beginIndex < 0) {
1999             throw new StringIndexOutOfBoundsException(beginIndex);
2000         }
2001         if (endIndex > value.length) {
2002             throw new StringIndexOutOfBoundsException(endIndex);
2003         }
2004         int subLen = endIndex - beginIndex;
2005         if (subLen < 0) {
2006             throw new StringIndexOutOfBoundsException(subLen);
2007         }
2008 
2009         return (subLen == value.length)
2010                 ? this
2011                 : new SubSequence(this, beginIndex, subLen);
2012     }
2013 
2014     /**
2015      * A CharSequence implemented as a sub-sequence of a String.
2016      */
2017     private final static class SubSequence implements
2018         java.io.Serializable, CharSequence, Comparable<CharSequence> {
2019 
2020         /**
2021          *  The String of which we are a sub-sequence.
2022          */
2023         private final String source;
2024 
2025         /**
2026          * The offset within the String of our first character.
2027          */
2028         private final int offset;
2029 
2030         /**
2031          * The number of characters in this sub-sequence.
2032          */
2033         private final int count;
2034 
2035         /**
2036          * Cached hash code value.
2037          */
2038         private int hashCache = 0;
2039 
2040         /**
2041          * Construct a new sub-sequence.
2042          *
2043          * @implNote Input values are not validated.
2044          *
2045          * @param source The String of which we are a sub-sequence.
2046          * @param offset The offset within the String of our first character.
2047          * @param count The number of characters in this sub-sequence.
2048          */
2049         SubSequence(String source, int offset, int count) {
2050             this.source = source;
2051             this.offset = offset;
2052             this.count = count;
2053         }
2054 
2055         @Override
2056         public boolean equals(Object other) {
2057             if (other == this) {
2058                 // it's me!
2059                 return true;
2060             }
2061 
2062             final char[] val1 = source.value;
2063             int offset1 = offset;
2064             final char[] val2;
2065             int offset2;
2066             int each;
2067             if (other instanceof SubSequence) {
2068                 SubSequence likeMe = (SubSequence)other;
2069                 val2 = likeMe.source.value;
2070                 offset2 = likeMe.offset;
2071                 each = likeMe.count;
2072             } else if (other instanceof String) {
2073                 String similar = (String)other;
2074                 val2 = similar.value;
2075                 offset2 = 0;
2076                 each = similar.value.length;
2077             } else {
2078                 // not of recognized type.
2079                 return false;
2080             }
2081 
2082             if (each != count) {
2083                 // not the same length
2084                 return false;
2085             }
2086 
2087             offset1 += each;
2088             offset2 += each;
2089             while (--each >= 0) {
2090                 if (val1[--offset1] != val2[--offset2]) {
2091                     // unequal char
2092                     return false;
2093                 }
2094             }
2095 
2096             // chars were all equal.
2097             return true;
2098         }
2099 
2100         /**
2101          * Return the hash code value for this object.
2102          *
2103          * @implSpec The hash code of a SubSequence is the same as that of a
2104          * String containing the same characters.
2105          *
2106          * @return a hash code value for this object.
2107          */
2108         @Override
2109         public int hashCode() {
2110             int h = hashCache;
2111             if (h == 0 && count > 0) {
2112                 char val[] = source.value; // avoid getfield opcode
2113                 for (int i = 0; i < count; i++) {
2114                     h = 31 * h + val[offset + i];
2115                 }
2116 
2117                 // harmless data race updating hashCache.
2118                 hashCache = h;
2119             }
2120 
2121             return h;
2122         }
2123 
2124         @Override
2125         public String toString() {
2126             return new String(source.value, offset, count);
2127         }
2128 
2129         public boolean isEmpty() {
2130             return count == 0;
2131         }
2132 
2133 
2134         @Override
2135         public int compareTo(CharSequence other) {
2136             int otherLen = other.length();
2137             for(int each=0; each < count; each++) {
2138                 if(each >= otherLen) {
2139                     return 1;
2140                 }
2141 
2142                 int diff = other.charAt(each) - source.value[offset+each];
2143 
2144                 if(0 == diff) {
2145                     continue;
2146                 }
2147 
2148                 return diff;
2149             }
2150 
2151             return (otherLen > count) ? -1 : 0;
2152         }
2153 
2154         @Override
2155         public int length() {
2156             return count;
2157         }
2158 
2159         @Override
2160         public char charAt(int index) {
2161             if(index < 0 || index >= count) {
2162                 throw new IndexOutOfBoundsException();
2163             }
2164             return source.value[offset+index];
2165         }
2166 
2167         @Override
2168         public CharSequence subSequence(int start, int end) {
2169             int len = end - start;
2170             if (start < 0 ||
2171                 end < start ||
2172                 len > count) {
2173                 throw new IndexOutOfBoundsException();
2174             }
2175 
2176             if (0 == len) {
2177                 // it's empty.
2178                 return new String();
2179             }
2180 
2181             if(start == 0 && len == count) {
2182                 // exactly the same sequence.
2183                 return this;
2184             }
2185 
2186             // create an even smaller sub-sequence
2187             return new SubSequence(source, offset+start, len);
2188         }
2189 
2190         /**
2191          * {@inheritDoc}
2192          *
2193          * @implNote We replace this sub-sequence with a String containing the
2194          * same sequence of characters.
2195          */
2196         public Object writeReplace() throws ObjectStreamException {
2197             // It's better to just replace with string.
2198             return toString();
2199         }
2200     }
2201 
2202     /**
2203      * Concatenates the specified string to the end of this string.
2204      * <p>
2205      * If the length of the argument string is {@code 0}, then this
2206      * {@code String} object is returned. Otherwise, a new
2207      * {@code String} object is created, representing a character
2208      * sequence that is the concatenation of the character sequence
2209      * represented by this {@code String} object and the character
2210      * sequence represented by the argument string.<p>
2211      * Examples:
2212      * <blockquote><pre>
2213      * "cares".concat("s") returns "caress"
2214      * "to".concat("get").concat("her") returns "together"
2215      * </pre></blockquote>
2216      *
2217      * @param   str   the {@code String} that is concatenated to the end
2218      *                of this {@code String}.
2219      * @return  a string that represents the concatenation of this object's