* A character is lowercase if its general category type, provided
* by {@code Character.getType(ch)}, is
- * {@code LOWERCASE_LETTER}.
+ * {@code LOWERCASE_LETTER}, or it has contributory property
+ * Other_Lowercase as defined by the Unicode Standard.
*
@@ -5235,7 +5236,8 @@
*
* A character is lowercase if its general category type, provided
* by {@link Character#getType getType(codePoint)}, is
- * {@code LOWERCASE_LETTER}.
+ * {@code LOWERCASE_LETTER}, or it has contributory property
+ * Other_Lowercase as defined by the Unicode Standard.
*
* The following are examples of lowercase characters:
*
@@ -5257,7 +5259,8 @@
* @since 1.5
*/
public static boolean isLowerCase(int codePoint) {
- return getType(codePoint) == Character.LOWERCASE_LETTER;
+ return getType(codePoint) == Character.LOWERCASE_LETTER ||
+ CharacterData.of(codePoint).isOtherLowercase(codePoint);
}
/**
@@ -5265,6 +5268,7 @@
*
* A character is uppercase if its general category type, provided by
* {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
+ * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
*
* The following are examples of uppercase characters:
*
@@ -5298,7 +5302,8 @@
* Determines if the specified character (Unicode code point) is an uppercase character.
*
* A character is uppercase if its general category type, provided by
- * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}.
+ * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
+ * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
*
* The following are examples of uppercase characters:
*
@@ -5320,7 +5325,8 @@
* @since 1.5
*/
public static boolean isUpperCase(int codePoint) {
- return getType(codePoint) == Character.UPPERCASE_LETTER;
+ return getType(codePoint) == Character.UPPERCASE_LETTER ||
+ CharacterData.of(codePoint).isOtherUppercase(codePoint);
}
/**
@@ -5725,6 +5731,52 @@
}
/**
+ * Determines if the specified character (Unicode code point) is an alphabet.
+ *
+ * A character is considered to be alphabetic if its general category type,
+ * provided by {@link Character#getType(int) getType(codePoint)}, is any of
+ * the following:
+ *
+ * -
UPPERCASE_LETTER
+ * -
LOWERCASE_LETTER
+ * -
TITLECASE_LETTER
+ * -
MODIFIER_LETTER
+ * -
OTHER_LETTER
+ * -
LETTER_NUMBER
+ *
+ * or it has contributory property Other_Alphabetic as defined by the
+ * Unicode Standard.
+ *
+ * @param codePoint the character (Unicode code point) to be tested.
+ * @return true
if the character is a Unicode alphabet
+ * character, false
otherwise.
+ * @since 1.7
+ */
+ public static boolean isAlphabetic(int codePoint) {
+ return (((((1 << Character.UPPERCASE_LETTER) |
+ (1 << Character.LOWERCASE_LETTER) |
+ (1 << Character.TITLECASE_LETTER) |
+ (1 << Character.MODIFIER_LETTER) |
+ (1 << Character.OTHER_LETTER) |
+ (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
+ CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
+ }
+
+ /**
+ * Determines if the specified character (Unicode code point) is a CJKV
+ * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
+ * the Unicode Standard.
+ *
+ * @param codePoint the character (Unicode code point) to be tested.
+ * @return true
if the character is a Unicode ideograph
+ * character, false
otherwise.
+ * @since 1.7
+ */
+ public static boolean isIdeographic(int codePoint) {
+ return CharacterData.of(codePoint).isIdeographic(codePoint);
+ }
+
+ /**
* Determines if the specified character is
* permissible as the first character in a Java identifier.
*
@@ -6430,7 +6482,7 @@
/**
* Determines if the specified character is a Unicode space character.
* A character is considered to be a space character if and only if
- * it is specified to be a space character by the Unicode standard. This
+ * it is specified to be a space character by the Unicode Standard. This
* method returns true if the character's general category type is any of
* the following:
*
@@ -6458,7 +6510,7 @@
* Determines if the specified character (Unicode code point) is a
* Unicode space character. A character is considered to be a
* space character if and only if it is specified to be a space
- * character by the Unicode standard. This method returns true if
+ * character by the Unicode Standard. This method returns true if
* the character's general category type is any of the following:
*
*
@@ -6908,7 +6960,7 @@
* @since 1.4
*/
static char[] toUpperCaseCharArray(int codePoint) {
- // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
+ // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
assert isBmpCodePoint(codePoint);
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
}
@@ -6941,7 +6993,7 @@
* Note: if the specified character is not assigned a name by
* the UnicodeData file (part of the Unicode Character
* Database maintained by the Unicode Consortium), the returned
- * name is the same as the result of expression
+ * name is the same as the result of expression.
*
* {@code
* Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')