--- old/src/share/classes/java/lang/Character.java 2011-04-28 01:11:08.363069538 -0700 +++ new/src/share/classes/java/lang/Character.java 2011-04-28 01:11:07.959019313 -0700 @@ -59,14 +59,14 @@ *

The {@code char} data type (and therefore the value that a * {@code Character} object encapsulates) are based on the * original Unicode specification, which defined characters as - * fixed-width 16-bit entities. The Unicode standard has since been + * fixed-width 16-bit entities. The Unicode Standard has since been * changed to allow for characters whose representation requires more * than 16 bits. The range of legal code points is now * U+0000 to U+10FFFF, known as Unicode scalar value. * (Refer to the * definition of the U+n notation in the Unicode - * standard.) + * Standard.) * *

The set of characters from U+0000 to U+FFFF is * sometimes referred to as the Basic Multilingual Plane (BMP). @@ -5200,7 +5200,8 @@ *

* A character is lowercase if its general category type, provided * by {@code Character.getType(ch)}, is - * {@code LOWERCASE_LETTER}. + * {@code LOWERCASE_LETTER}, or it has contributory property + * Other_Lowercase as defined by the Unicode Standard. *

* The following are examples of lowercase characters: *

@@ -5235,7 +5236,8 @@
      * 

* A character is lowercase if its general category type, provided * by {@link Character#getType getType(codePoint)}, is - * {@code LOWERCASE_LETTER}. + * {@code LOWERCASE_LETTER}, or it has contributory property + * Other_Lowercase as defined by the Unicode Standard. *

* The following are examples of lowercase characters: *

@@ -5257,7 +5259,8 @@
      * @since   1.5
      */
     public static boolean isLowerCase(int codePoint) {
-        return getType(codePoint) == Character.LOWERCASE_LETTER;
+        return getType(codePoint) == Character.LOWERCASE_LETTER ||
+               CharacterData.of(codePoint).isOtherLowercase(codePoint);
     }
 
     /**
@@ -5265,6 +5268,7 @@
      * 

* A character is uppercase if its general category type, provided by * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. + * or it has contributory property Other_Uppercase as defined by the Unicode Standard. *

* The following are examples of uppercase characters: *

@@ -5298,7 +5302,8 @@
      * Determines if the specified character (Unicode code point) is an uppercase character.
      * 

* A character is uppercase if its general category type, provided by - * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}. + * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, + * or it has contributory property Other_Uppercase as defined by the Unicode Standard. *

* The following are examples of uppercase characters: *

@@ -5320,7 +5325,8 @@
      * @since   1.5
      */
     public static boolean isUpperCase(int codePoint) {
-        return getType(codePoint) == Character.UPPERCASE_LETTER;
+        return getType(codePoint) == Character.UPPERCASE_LETTER ||
+               CharacterData.of(codePoint).isOtherUppercase(codePoint);
     }
 
     /**
@@ -5725,6 +5731,52 @@
     }
 
     /**
+     * Determines if the specified character (Unicode code point) is an alphabet.
+     * 

+ * A character is considered to be alphabetic if its general category type, + * provided by {@link Character#getType(int) getType(codePoint)}, is any of + * the following: + *

    + *
  • UPPERCASE_LETTER + *
  • LOWERCASE_LETTER + *
  • TITLECASE_LETTER + *
  • MODIFIER_LETTER + *
  • OTHER_LETTER + *
  • LETTER_NUMBER + *
+ * or it has contributory property Other_Alphabetic as defined by the + * Unicode Standard. + * + * @param codePoint the character (Unicode code point) to be tested. + * @return true if the character is a Unicode alphabet + * character, false otherwise. + * @since 1.7 + */ + public static boolean isAlphabetic(int codePoint) { + return (((((1 << Character.UPPERCASE_LETTER) | + (1 << Character.LOWERCASE_LETTER) | + (1 << Character.TITLECASE_LETTER) | + (1 << Character.MODIFIER_LETTER) | + (1 << Character.OTHER_LETTER) | + (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || + CharacterData.of(codePoint).isOtherAlphabetic(codePoint); + } + + /** + * Determines if the specified character (Unicode code point) is a CJKV + * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by + * the Unicode Standard. + * + * @param codePoint the character (Unicode code point) to be tested. + * @return true if the character is a Unicode ideograph + * character, false otherwise. + * @since 1.7 + */ + public static boolean isIdeographic(int codePoint) { + return CharacterData.of(codePoint).isIdeographic(codePoint); + } + + /** * Determines if the specified character is * permissible as the first character in a Java identifier. *

@@ -6430,7 +6482,7 @@ /** * Determines if the specified character is a Unicode space character. * A character is considered to be a space character if and only if - * it is specified to be a space character by the Unicode standard. This + * it is specified to be a space character by the Unicode Standard. This * method returns true if the character's general category type is any of * the following: *

    @@ -6458,7 +6510,7 @@ * Determines if the specified character (Unicode code point) is a * Unicode space character. A character is considered to be a * space character if and only if it is specified to be a space - * character by the Unicode standard. This method returns true if + * character by the Unicode Standard. This method returns true if * the character's general category type is any of the following: * *
      @@ -6908,7 +6960,7 @@ * @since 1.4 */ static char[] toUpperCaseCharArray(int codePoint) { - // As of Unicode 4.0, 1:M uppercasings only happen in the BMP. + // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. assert isBmpCodePoint(codePoint); return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); } @@ -6941,7 +6993,7 @@ * Note: if the specified character is not assigned a name by * the UnicodeData file (part of the Unicode Character * Database maintained by the Unicode Consortium), the returned - * name is the same as the result of expression + * name is the same as the result of expression. * *
      {@code * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')