src/share/classes/java/lang/Character.java

Print this page




4481             if (type == UNASSIGNED)
4482                 return UNKNOWN;
4483             int index = Arrays.binarySearch(scriptStarts, codePoint);
4484             if (index < 0)
4485                 index = -index - 2;
4486             return scripts[index];
4487         }
4488 
4489         /**
4490          * Returns the UnicodeScript constant with the given Unicode script
4491          * name or the script name alias. Script names and their aliases are
4492          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4493          * and PropertyValueAliases&lt;version&gt;.txt define script names
4494          * and the script name aliases for a particular version of the
4495          * standard. The {@link Character} class specifies the version of
4496          * the standard that it supports.
4497          * <p>
4498          * Character case is ignored for all of the valid script names.
4499          * The en_US locale's case mapping rules are used to provide
4500          * case-insensitive string comparisons for script name validation.
4501          * <p>
4502          *
4503          * @param scriptName A {@code UnicodeScript} name.
4504          * @return The {@code UnicodeScript} constant identified
4505          *         by {@code scriptName}
4506          * @throws IllegalArgumentException if {@code scriptName} is an
4507          *         invalid name
4508          * @throws NullPointerException if {@code scriptName} is null
4509          */
4510         public static final UnicodeScript forName(String scriptName) {
4511             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4512                                  //.replace(' ', '_'));
4513             UnicodeScript sc = aliases.get(scriptName);
4514             if (sc != null)
4515                 return sc;
4516             return valueOf(scriptName);
4517         }
4518     }
4519 
4520     /**
4521      * The value of the {@code Character}.


5486      */
5487     public static boolean isUpperCase(char ch) {
5488         return isUpperCase((int)ch);
5489     }
5490 
5491     /**
5492      * Determines if the specified character (Unicode code point) is an uppercase character.
5493      * <p>
5494      * A character is uppercase if its general category type, provided by
5495      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5496      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5497      * <p>
5498      * The following are examples of uppercase characters:
5499      * <blockquote><pre>
5500      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5501      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5502      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5503      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5504      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5505      * </pre></blockquote>
5506      * <p> Many other Unicode characters are uppercase too.<p>
5507      *
5508      * @param   codePoint the character (Unicode code point) to be tested.
5509      * @return  {@code true} if the character is uppercase;
5510      *          {@code false} otherwise.
5511      * @see     Character#isLowerCase(int)
5512      * @see     Character#isTitleCase(int)
5513      * @see     Character#toUpperCase(int)
5514      * @see     Character#getType(int)
5515      * @since   1.5
5516      */
5517     public static boolean isUpperCase(int codePoint) {
5518         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5519                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5520     }
5521 
5522     /**
5523      * Determines if the specified character is a titlecase character.
5524      * <p>
5525      * A character is a titlecase character if its general
5526      * category type, provided by {@code Character.getType(ch)},


5564      * Determines if the specified character (Unicode code point) is a titlecase character.
5565      * <p>
5566      * A character is a titlecase character if its general
5567      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5568      * is {@code TITLECASE_LETTER}.
5569      * <p>
5570      * Some characters look like pairs of Latin letters. For example, there
5571      * is an uppercase letter that looks like "LJ" and has a corresponding
5572      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5573      * is the appropriate form to use when rendering a word in lowercase
5574      * with initial capitals, as for a book title.
5575      * <p>
5576      * These are some of the Unicode characters for which this method returns
5577      * {@code true}:
5578      * <ul>
5579      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5580      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5581      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5582      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5583      * </ul>
5584      * <p> Many other Unicode characters are titlecase too.<p>
5585      *
5586      * @param   codePoint the character (Unicode code point) to be tested.
5587      * @return  {@code true} if the character is titlecase;
5588      *          {@code false} otherwise.
5589      * @see     Character#isLowerCase(int)
5590      * @see     Character#isUpperCase(int)
5591      * @see     Character#toTitleCase(int)
5592      * @see     Character#getType(int)
5593      * @since   1.5
5594      */
5595     public static boolean isTitleCase(int codePoint) {
5596         return getType(codePoint) == Character.TITLECASE_LETTER;
5597     }
5598 
5599     /**
5600      * Determines if the specified character is a digit.
5601      * <p>
5602      * A character is a digit if its general category type, provided
5603      * by {@code Character.getType(ch)}, is
5604      * {@code DECIMAL_DIGIT_NUMBER}.


6760     /**
6761      * Determines if the specified character (Unicode code point) is
6762      * white space according to Java.  A character is a Java
6763      * whitespace character if and only if it satisfies one of the
6764      * following criteria:
6765      * <ul>
6766      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6767      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6768      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6769      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6770      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6771      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6772      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6773      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6774      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6775      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6776      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6777      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6778      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6779      * </ul>
6780      * <p>
6781      *
6782      * @param   codePoint the character (Unicode code point) to be tested.
6783      * @return  {@code true} if the character is a Java whitespace
6784      *          character; {@code false} otherwise.
6785      * @see     Character#isSpaceChar(int)
6786      * @since   1.5
6787      */
6788     public static boolean isWhitespace(int codePoint) {
6789         return CharacterData.of(codePoint).isWhitespace(codePoint);
6790     }
6791 
6792     /**
6793      * Determines if the specified character is an ISO control
6794      * character.  A character is considered to be an ISO control
6795      * character if its code is in the range {@code '\u005Cu0000'}
6796      * through {@code '\u005Cu001F'} or in the range
6797      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6798      *
6799      * <p><b>Note:</b> This method cannot handle <a
6800      * href="#supplementary"> supplementary characters</a>. To support


7104      * Compares two {@code char} values numerically.
7105      * The value returned is identical to what would be returned by:
7106      * <pre>
7107      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7108      * </pre>
7109      *
7110      * @param  x the first {@code char} to compare
7111      * @param  y the second {@code char} to compare
7112      * @return the value {@code 0} if {@code x == y};
7113      *         a value less than {@code 0} if {@code x < y}; and
7114      *         a value greater than {@code 0} if {@code x > y}
7115      * @since 1.7
7116      */
7117     public static int compare(char x, char y) {
7118         return x - y;
7119     }
7120 
7121     /**
7122      * Converts the character (Unicode code point) argument to uppercase using
7123      * information from the UnicodeData file.
7124      * <p>
7125      *
7126      * @param   codePoint   the character (Unicode code point) to be converted.
7127      * @return  either the uppercase equivalent of the character, if
7128      *          any, or an error flag ({@code Character.ERROR})
7129      *          that indicates that a 1:M {@code char} mapping exists.
7130      * @see     Character#isLowerCase(char)
7131      * @see     Character#isUpperCase(char)
7132      * @see     Character#toLowerCase(char)
7133      * @see     Character#toTitleCase(char)
7134      * @since 1.4
7135      */
7136     static int toUpperCaseEx(int codePoint) {
7137         assert isValidCodePoint(codePoint);
7138         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7139     }
7140 
7141     /**
7142      * Converts the character (Unicode code point) argument to uppercase using case
7143      * mapping information from the SpecialCasing file in the Unicode
7144      * specification. If a character has no explicit uppercase




4481             if (type == UNASSIGNED)
4482                 return UNKNOWN;
4483             int index = Arrays.binarySearch(scriptStarts, codePoint);
4484             if (index < 0)
4485                 index = -index - 2;
4486             return scripts[index];
4487         }
4488 
4489         /**
4490          * Returns the UnicodeScript constant with the given Unicode script
4491          * name or the script name alias. Script names and their aliases are
4492          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4493          * and PropertyValueAliases&lt;version&gt;.txt define script names
4494          * and the script name aliases for a particular version of the
4495          * standard. The {@link Character} class specifies the version of
4496          * the standard that it supports.
4497          * <p>
4498          * Character case is ignored for all of the valid script names.
4499          * The en_US locale's case mapping rules are used to provide
4500          * case-insensitive string comparisons for script name validation.

4501          *
4502          * @param scriptName A {@code UnicodeScript} name.
4503          * @return The {@code UnicodeScript} constant identified
4504          *         by {@code scriptName}
4505          * @throws IllegalArgumentException if {@code scriptName} is an
4506          *         invalid name
4507          * @throws NullPointerException if {@code scriptName} is null
4508          */
4509         public static final UnicodeScript forName(String scriptName) {
4510             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4511                                  //.replace(' ', '_'));
4512             UnicodeScript sc = aliases.get(scriptName);
4513             if (sc != null)
4514                 return sc;
4515             return valueOf(scriptName);
4516         }
4517     }
4518 
4519     /**
4520      * The value of the {@code Character}.


5485      */
5486     public static boolean isUpperCase(char ch) {
5487         return isUpperCase((int)ch);
5488     }
5489 
5490     /**
5491      * Determines if the specified character (Unicode code point) is an uppercase character.
5492      * <p>
5493      * A character is uppercase if its general category type, provided by
5494      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5495      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5496      * <p>
5497      * The following are examples of uppercase characters:
5498      * <blockquote><pre>
5499      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5500      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5501      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5502      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5503      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5504      * </pre></blockquote>
5505      * <p> Many other Unicode characters are uppercase too.
5506      *
5507      * @param   codePoint the character (Unicode code point) to be tested.
5508      * @return  {@code true} if the character is uppercase;
5509      *          {@code false} otherwise.
5510      * @see     Character#isLowerCase(int)
5511      * @see     Character#isTitleCase(int)
5512      * @see     Character#toUpperCase(int)
5513      * @see     Character#getType(int)
5514      * @since   1.5
5515      */
5516     public static boolean isUpperCase(int codePoint) {
5517         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5518                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5519     }
5520 
5521     /**
5522      * Determines if the specified character is a titlecase character.
5523      * <p>
5524      * A character is a titlecase character if its general
5525      * category type, provided by {@code Character.getType(ch)},


5563      * Determines if the specified character (Unicode code point) is a titlecase character.
5564      * <p>
5565      * A character is a titlecase character if its general
5566      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5567      * is {@code TITLECASE_LETTER}.
5568      * <p>
5569      * Some characters look like pairs of Latin letters. For example, there
5570      * is an uppercase letter that looks like "LJ" and has a corresponding
5571      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5572      * is the appropriate form to use when rendering a word in lowercase
5573      * with initial capitals, as for a book title.
5574      * <p>
5575      * These are some of the Unicode characters for which this method returns
5576      * {@code true}:
5577      * <ul>
5578      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5579      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5580      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5581      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5582      * </ul>
5583      * <p> Many other Unicode characters are titlecase too.
5584      *
5585      * @param   codePoint the character (Unicode code point) to be tested.
5586      * @return  {@code true} if the character is titlecase;
5587      *          {@code false} otherwise.
5588      * @see     Character#isLowerCase(int)
5589      * @see     Character#isUpperCase(int)
5590      * @see     Character#toTitleCase(int)
5591      * @see     Character#getType(int)
5592      * @since   1.5
5593      */
5594     public static boolean isTitleCase(int codePoint) {
5595         return getType(codePoint) == Character.TITLECASE_LETTER;
5596     }
5597 
5598     /**
5599      * Determines if the specified character is a digit.
5600      * <p>
5601      * A character is a digit if its general category type, provided
5602      * by {@code Character.getType(ch)}, is
5603      * {@code DECIMAL_DIGIT_NUMBER}.


6759     /**
6760      * Determines if the specified character (Unicode code point) is
6761      * white space according to Java.  A character is a Java
6762      * whitespace character if and only if it satisfies one of the
6763      * following criteria:
6764      * <ul>
6765      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6766      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6767      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6768      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6769      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6770      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6771      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6772      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6773      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6774      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6775      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6776      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6777      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6778      * </ul>

6779      *
6780      * @param   codePoint the character (Unicode code point) to be tested.
6781      * @return  {@code true} if the character is a Java whitespace
6782      *          character; {@code false} otherwise.
6783      * @see     Character#isSpaceChar(int)
6784      * @since   1.5
6785      */
6786     public static boolean isWhitespace(int codePoint) {
6787         return CharacterData.of(codePoint).isWhitespace(codePoint);
6788     }
6789 
6790     /**
6791      * Determines if the specified character is an ISO control
6792      * character.  A character is considered to be an ISO control
6793      * character if its code is in the range {@code '\u005Cu0000'}
6794      * through {@code '\u005Cu001F'} or in the range
6795      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6796      *
6797      * <p><b>Note:</b> This method cannot handle <a
6798      * href="#supplementary"> supplementary characters</a>. To support


7102      * Compares two {@code char} values numerically.
7103      * The value returned is identical to what would be returned by:
7104      * <pre>
7105      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7106      * </pre>
7107      *
7108      * @param  x the first {@code char} to compare
7109      * @param  y the second {@code char} to compare
7110      * @return the value {@code 0} if {@code x == y};
7111      *         a value less than {@code 0} if {@code x < y}; and
7112      *         a value greater than {@code 0} if {@code x > y}
7113      * @since 1.7
7114      */
7115     public static int compare(char x, char y) {
7116         return x - y;
7117     }
7118 
7119     /**
7120      * Converts the character (Unicode code point) argument to uppercase using
7121      * information from the UnicodeData file.

7122      *
7123      * @param   codePoint   the character (Unicode code point) to be converted.
7124      * @return  either the uppercase equivalent of the character, if
7125      *          any, or an error flag ({@code Character.ERROR})
7126      *          that indicates that a 1:M {@code char} mapping exists.
7127      * @see     Character#isLowerCase(char)
7128      * @see     Character#isUpperCase(char)
7129      * @see     Character#toLowerCase(char)
7130      * @see     Character#toTitleCase(char)
7131      * @since 1.4
7132      */
7133     static int toUpperCaseEx(int codePoint) {
7134         assert isValidCodePoint(codePoint);
7135         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7136     }
7137 
7138     /**
7139      * Converts the character (Unicode code point) argument to uppercase using case
7140      * mapping information from the SpecialCasing file in the Unicode
7141      * specification. If a character has no explicit uppercase