4481 if (type == UNASSIGNED)
4482 return UNKNOWN;
4483 int index = Arrays.binarySearch(scriptStarts, codePoint);
4484 if (index < 0)
4485 index = -index - 2;
4486 return scripts[index];
4487 }
4488
4489 /**
4490 * Returns the UnicodeScript constant with the given Unicode script
4491 * name or the script name alias. Script names and their aliases are
4492 * determined by The Unicode Standard. The files Scripts<version>.txt
4493 * and PropertyValueAliases<version>.txt define script names
4494 * and the script name aliases for a particular version of the
4495 * standard. The {@link Character} class specifies the version of
4496 * the standard that it supports.
4497 * <p>
4498 * Character case is ignored for all of the valid script names.
4499 * The en_US locale's case mapping rules are used to provide
4500 * case-insensitive string comparisons for script name validation.
4501 * <p>
4502 *
4503 * @param scriptName A {@code UnicodeScript} name.
4504 * @return The {@code UnicodeScript} constant identified
4505 * by {@code scriptName}
4506 * @throws IllegalArgumentException if {@code scriptName} is an
4507 * invalid name
4508 * @throws NullPointerException if {@code scriptName} is null
4509 */
4510 public static final UnicodeScript forName(String scriptName) {
4511 scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4512 //.replace(' ', '_'));
4513 UnicodeScript sc = aliases.get(scriptName);
4514 if (sc != null)
4515 return sc;
4516 return valueOf(scriptName);
4517 }
4518 }
4519
4520 /**
4521 * The value of the {@code Character}.
5486 */
5487 public static boolean isUpperCase(char ch) {
5488 return isUpperCase((int)ch);
5489 }
5490
5491 /**
5492 * Determines if the specified character (Unicode code point) is an uppercase character.
5493 * <p>
5494 * A character is uppercase if its general category type, provided by
5495 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5496 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5497 * <p>
5498 * The following are examples of uppercase characters:
5499 * <blockquote><pre>
5500 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5501 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5502 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5503 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5504 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5505 * </pre></blockquote>
5506 * <p> Many other Unicode characters are uppercase too.<p>
5507 *
5508 * @param codePoint the character (Unicode code point) to be tested.
5509 * @return {@code true} if the character is uppercase;
5510 * {@code false} otherwise.
5511 * @see Character#isLowerCase(int)
5512 * @see Character#isTitleCase(int)
5513 * @see Character#toUpperCase(int)
5514 * @see Character#getType(int)
5515 * @since 1.5
5516 */
5517 public static boolean isUpperCase(int codePoint) {
5518 return getType(codePoint) == Character.UPPERCASE_LETTER ||
5519 CharacterData.of(codePoint).isOtherUppercase(codePoint);
5520 }
5521
5522 /**
5523 * Determines if the specified character is a titlecase character.
5524 * <p>
5525 * A character is a titlecase character if its general
5526 * category type, provided by {@code Character.getType(ch)},
5564 * Determines if the specified character (Unicode code point) is a titlecase character.
5565 * <p>
5566 * A character is a titlecase character if its general
5567 * category type, provided by {@link Character#getType(int) getType(codePoint)},
5568 * is {@code TITLECASE_LETTER}.
5569 * <p>
5570 * Some characters look like pairs of Latin letters. For example, there
5571 * is an uppercase letter that looks like "LJ" and has a corresponding
5572 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5573 * is the appropriate form to use when rendering a word in lowercase
5574 * with initial capitals, as for a book title.
5575 * <p>
5576 * These are some of the Unicode characters for which this method returns
5577 * {@code true}:
5578 * <ul>
5579 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5580 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5581 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5582 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5583 * </ul>
5584 * <p> Many other Unicode characters are titlecase too.<p>
5585 *
5586 * @param codePoint the character (Unicode code point) to be tested.
5587 * @return {@code true} if the character is titlecase;
5588 * {@code false} otherwise.
5589 * @see Character#isLowerCase(int)
5590 * @see Character#isUpperCase(int)
5591 * @see Character#toTitleCase(int)
5592 * @see Character#getType(int)
5593 * @since 1.5
5594 */
5595 public static boolean isTitleCase(int codePoint) {
5596 return getType(codePoint) == Character.TITLECASE_LETTER;
5597 }
5598
5599 /**
5600 * Determines if the specified character is a digit.
5601 * <p>
5602 * A character is a digit if its general category type, provided
5603 * by {@code Character.getType(ch)}, is
5604 * {@code DECIMAL_DIGIT_NUMBER}.
6760 /**
6761 * Determines if the specified character (Unicode code point) is
6762 * white space according to Java. A character is a Java
6763 * whitespace character if and only if it satisfies one of the
6764 * following criteria:
6765 * <ul>
6766 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6767 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6768 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
6769 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6770 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6771 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6772 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6773 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6774 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6775 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6776 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6777 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6778 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6779 * </ul>
6780 * <p>
6781 *
6782 * @param codePoint the character (Unicode code point) to be tested.
6783 * @return {@code true} if the character is a Java whitespace
6784 * character; {@code false} otherwise.
6785 * @see Character#isSpaceChar(int)
6786 * @since 1.5
6787 */
6788 public static boolean isWhitespace(int codePoint) {
6789 return CharacterData.of(codePoint).isWhitespace(codePoint);
6790 }
6791
6792 /**
6793 * Determines if the specified character is an ISO control
6794 * character. A character is considered to be an ISO control
6795 * character if its code is in the range {@code '\u005Cu0000'}
6796 * through {@code '\u005Cu001F'} or in the range
6797 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6798 *
6799 * <p><b>Note:</b> This method cannot handle <a
6800 * href="#supplementary"> supplementary characters</a>. To support
7104 * Compares two {@code char} values numerically.
7105 * The value returned is identical to what would be returned by:
7106 * <pre>
7107 * Character.valueOf(x).compareTo(Character.valueOf(y))
7108 * </pre>
7109 *
7110 * @param x the first {@code char} to compare
7111 * @param y the second {@code char} to compare
7112 * @return the value {@code 0} if {@code x == y};
7113 * a value less than {@code 0} if {@code x < y}; and
7114 * a value greater than {@code 0} if {@code x > y}
7115 * @since 1.7
7116 */
7117 public static int compare(char x, char y) {
7118 return x - y;
7119 }
7120
7121 /**
7122 * Converts the character (Unicode code point) argument to uppercase using
7123 * information from the UnicodeData file.
7124 * <p>
7125 *
7126 * @param codePoint the character (Unicode code point) to be converted.
7127 * @return either the uppercase equivalent of the character, if
7128 * any, or an error flag ({@code Character.ERROR})
7129 * that indicates that a 1:M {@code char} mapping exists.
7130 * @see Character#isLowerCase(char)
7131 * @see Character#isUpperCase(char)
7132 * @see Character#toLowerCase(char)
7133 * @see Character#toTitleCase(char)
7134 * @since 1.4
7135 */
7136 static int toUpperCaseEx(int codePoint) {
7137 assert isValidCodePoint(codePoint);
7138 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7139 }
7140
7141 /**
7142 * Converts the character (Unicode code point) argument to uppercase using case
7143 * mapping information from the SpecialCasing file in the Unicode
7144 * specification. If a character has no explicit uppercase
|
4481 if (type == UNASSIGNED)
4482 return UNKNOWN;
4483 int index = Arrays.binarySearch(scriptStarts, codePoint);
4484 if (index < 0)
4485 index = -index - 2;
4486 return scripts[index];
4487 }
4488
4489 /**
4490 * Returns the UnicodeScript constant with the given Unicode script
4491 * name or the script name alias. Script names and their aliases are
4492 * determined by The Unicode Standard. The files Scripts<version>.txt
4493 * and PropertyValueAliases<version>.txt define script names
4494 * and the script name aliases for a particular version of the
4495 * standard. The {@link Character} class specifies the version of
4496 * the standard that it supports.
4497 * <p>
4498 * Character case is ignored for all of the valid script names.
4499 * The en_US locale's case mapping rules are used to provide
4500 * case-insensitive string comparisons for script name validation.
4501 *
4502 * @param scriptName A {@code UnicodeScript} name.
4503 * @return The {@code UnicodeScript} constant identified
4504 * by {@code scriptName}
4505 * @throws IllegalArgumentException if {@code scriptName} is an
4506 * invalid name
4507 * @throws NullPointerException if {@code scriptName} is null
4508 */
4509 public static final UnicodeScript forName(String scriptName) {
4510 scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4511 //.replace(' ', '_'));
4512 UnicodeScript sc = aliases.get(scriptName);
4513 if (sc != null)
4514 return sc;
4515 return valueOf(scriptName);
4516 }
4517 }
4518
4519 /**
4520 * The value of the {@code Character}.
5485 */
5486 public static boolean isUpperCase(char ch) {
5487 return isUpperCase((int)ch);
5488 }
5489
5490 /**
5491 * Determines if the specified character (Unicode code point) is an uppercase character.
5492 * <p>
5493 * A character is uppercase if its general category type, provided by
5494 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5495 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5496 * <p>
5497 * The following are examples of uppercase characters:
5498 * <blockquote><pre>
5499 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5500 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5501 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5502 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5503 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5504 * </pre></blockquote>
5505 * <p> Many other Unicode characters are uppercase too.
5506 *
5507 * @param codePoint the character (Unicode code point) to be tested.
5508 * @return {@code true} if the character is uppercase;
5509 * {@code false} otherwise.
5510 * @see Character#isLowerCase(int)
5511 * @see Character#isTitleCase(int)
5512 * @see Character#toUpperCase(int)
5513 * @see Character#getType(int)
5514 * @since 1.5
5515 */
5516 public static boolean isUpperCase(int codePoint) {
5517 return getType(codePoint) == Character.UPPERCASE_LETTER ||
5518 CharacterData.of(codePoint).isOtherUppercase(codePoint);
5519 }
5520
5521 /**
5522 * Determines if the specified character is a titlecase character.
5523 * <p>
5524 * A character is a titlecase character if its general
5525 * category type, provided by {@code Character.getType(ch)},
5563 * Determines if the specified character (Unicode code point) is a titlecase character.
5564 * <p>
5565 * A character is a titlecase character if its general
5566 * category type, provided by {@link Character#getType(int) getType(codePoint)},
5567 * is {@code TITLECASE_LETTER}.
5568 * <p>
5569 * Some characters look like pairs of Latin letters. For example, there
5570 * is an uppercase letter that looks like "LJ" and has a corresponding
5571 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5572 * is the appropriate form to use when rendering a word in lowercase
5573 * with initial capitals, as for a book title.
5574 * <p>
5575 * These are some of the Unicode characters for which this method returns
5576 * {@code true}:
5577 * <ul>
5578 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5579 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5580 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5581 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5582 * </ul>
5583 * <p> Many other Unicode characters are titlecase too.
5584 *
5585 * @param codePoint the character (Unicode code point) to be tested.
5586 * @return {@code true} if the character is titlecase;
5587 * {@code false} otherwise.
5588 * @see Character#isLowerCase(int)
5589 * @see Character#isUpperCase(int)
5590 * @see Character#toTitleCase(int)
5591 * @see Character#getType(int)
5592 * @since 1.5
5593 */
5594 public static boolean isTitleCase(int codePoint) {
5595 return getType(codePoint) == Character.TITLECASE_LETTER;
5596 }
5597
5598 /**
5599 * Determines if the specified character is a digit.
5600 * <p>
5601 * A character is a digit if its general category type, provided
5602 * by {@code Character.getType(ch)}, is
5603 * {@code DECIMAL_DIGIT_NUMBER}.
6759 /**
6760 * Determines if the specified character (Unicode code point) is
6761 * white space according to Java. A character is a Java
6762 * whitespace character if and only if it satisfies one of the
6763 * following criteria:
6764 * <ul>
6765 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6766 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6767 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
6768 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6769 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6770 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6771 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6772 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6773 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6774 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6775 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6776 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6777 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6778 * </ul>
6779 *
6780 * @param codePoint the character (Unicode code point) to be tested.
6781 * @return {@code true} if the character is a Java whitespace
6782 * character; {@code false} otherwise.
6783 * @see Character#isSpaceChar(int)
6784 * @since 1.5
6785 */
6786 public static boolean isWhitespace(int codePoint) {
6787 return CharacterData.of(codePoint).isWhitespace(codePoint);
6788 }
6789
6790 /**
6791 * Determines if the specified character is an ISO control
6792 * character. A character is considered to be an ISO control
6793 * character if its code is in the range {@code '\u005Cu0000'}
6794 * through {@code '\u005Cu001F'} or in the range
6795 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6796 *
6797 * <p><b>Note:</b> This method cannot handle <a
6798 * href="#supplementary"> supplementary characters</a>. To support
7102 * Compares two {@code char} values numerically.
7103 * The value returned is identical to what would be returned by:
7104 * <pre>
7105 * Character.valueOf(x).compareTo(Character.valueOf(y))
7106 * </pre>
7107 *
7108 * @param x the first {@code char} to compare
7109 * @param y the second {@code char} to compare
7110 * @return the value {@code 0} if {@code x == y};
7111 * a value less than {@code 0} if {@code x < y}; and
7112 * a value greater than {@code 0} if {@code x > y}
7113 * @since 1.7
7114 */
7115 public static int compare(char x, char y) {
7116 return x - y;
7117 }
7118
7119 /**
7120 * Converts the character (Unicode code point) argument to uppercase using
7121 * information from the UnicodeData file.
7122 *
7123 * @param codePoint the character (Unicode code point) to be converted.
7124 * @return either the uppercase equivalent of the character, if
7125 * any, or an error flag ({@code Character.ERROR})
7126 * that indicates that a 1:M {@code char} mapping exists.
7127 * @see Character#isLowerCase(char)
7128 * @see Character#isUpperCase(char)
7129 * @see Character#toLowerCase(char)
7130 * @see Character#toTitleCase(char)
7131 * @since 1.4
7132 */
7133 static int toUpperCaseEx(int codePoint) {
7134 assert isValidCodePoint(codePoint);
7135 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7136 }
7137
7138 /**
7139 * Converts the character (Unicode code point) argument to uppercase using case
7140 * mapping information from the SpecialCasing file in the Unicode
7141 * specification. If a character has no explicit uppercase
|