1 /*
   2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary Test behavior of isJavaIdentifierXX, testIsJavaLetter, and
  27  *  testIsJavaLetterOrDigit methods for all code points.
  28  * @bug 8218915
  29  */
  30 
  31 import java.util.List;
  32 import java.util.ArrayList;
  33  
  34 public class TestIsJavaIdentifierMethods {
  35     
  36     // List of new code points are not present in Unicode 6.2.
  37     private static final List<Integer> UNASSIGNED_CODEPOINTS_IN_6_2
  38                                     = new ArrayList<Integer>()
  39                                     {{
  40                                         add(0x20BB); // NORDIC MARK SIGN
  41                                         add(0x20BC); // MANAT SIGN
  42                                         add(0x20BD); // RUBLE SIGN
  43                                         add(0x20BE); // LARI SIGN
  44                                         add(0x20BF); // BITCOIN SIGN 
  45                                         add(0x32FF); // SQUARE ERA NAME NEWERA
  46                                     }};
  47 
  48     public static void main(String[] args) {
  49         testIsJavaIdentifierPart_int();
  50         testIsJavaIdentifierPart_char();
  51         testIsJavaIdentifierStart_int();
  52         testIsJavaIdentifierStart_char();
  53         testIsJavaLetter();
  54         testIsJavaLetterOrDigit();
  55     }
  56 
  57     /**
  58      * Assertion testing for public static boolean isJavaIdentifierPart(int
  59      * codePoint), A character may be part of a Java identifier if any of the
  60      * following are true:
  61      * <ul>
  62      * <li>it is a letter</li>
  63      * <li>it is a currency symbol (such as <code>'$'</code>)</li>
  64      * <li>it is a connecting punctuation character (such as <code>'_'</code>)
  65          * </li>
  66      * <li>it is a digit</li>
  67      * <li>it is a numeric letter (such as a Roman numeral character)</li>
  68      * <li>it is a combining mark</li>
  69      * <li>it is a non-spacing mark</li>
  70      * <li><code>isIdentifierIgnorable</code> returns <code>true</code> for the
  71      * character</li>
  72      * </ul>
  73      * All code points from (0x0000..0x10FFFF) are tested.
  74      */
  75     public static void testIsJavaIdentifierPart_int() {
  76         for (int cp = 0; cp <= Character.MAX_CODE_POINT; cp++) {
  77             boolean expected = false;
  78 
  79             // Since Character.isJavaIdentifierPart(int) strictly conforms to
  80             // character information from version 6.2 of the Unicode Standard,
  81             // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
  82             // list. If the code point is found in list 
  83             // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 
  84             // "expected" is considered false.
  85             if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(cp)) {
  86                 byte type = (byte) Character.getType(cp);
  87                 expected = Character.isLetter(cp)
  88                         || type == Character.CURRENCY_SYMBOL
  89                         || type == Character.CONNECTOR_PUNCTUATION
  90                         || Character.isDigit(cp)
  91                         || type == Character.LETTER_NUMBER
  92                         || type == Character.COMBINING_SPACING_MARK
  93                         || type == Character.NON_SPACING_MARK
  94                         || Character.isIdentifierIgnorable(cp);
  95             }
  96 
  97             if (Character.isJavaIdentifierPart(cp) != expected) {
  98                 throw new RuntimeException(
  99                    "Character.isJavaIdentifierPart(int) failed for codepoint "
 100                                 + Integer.toHexString(cp));
 101             }
 102         }
 103     }
 104 
 105     /**
 106      * Assertion testing for public static boolean isJavaIdentifierPart(char
 107      * ch), A character may be part of a Java identifier if any of the 
 108      * following are true:
 109      * <ul>
 110      * <li>it is a letter;
 111      * <li>it is a currency symbol (such as "$");
 112      * <li>it is a connecting punctuation character (such as "_");
 113      * <li>it is a digit;
 114      * <li>it is a numeric letter (such as a Roman numeral character);
 115      * <li>it is a combining mark;
 116      * <li>it is a non-spacing mark;
 117      * <li>isIdentifierIgnorable returns true for the character.
 118      * </ul>
 119      * All Unicode code points in the BMP (0x0000..0xFFFF) are tested.
 120      */
 121     public static void testIsJavaIdentifierPart_char() {
 122         for (int i = 0; i <= Character.MAX_VALUE; ++i) {
 123             char ch = (char) i;
 124             boolean expected = false;
 125             // Since Character.isJavaIdentifierPart(char) strictly conforms to
 126             // character information from version 6.2 of the Unicode Standard,
 127             // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
 128             // list. If the code point is found in list 
 129             // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 
 130             // "expected" is considered false.
 131             if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) {
 132                 byte type = (byte) Character.getType(ch);
 133                 expected = Character.isLetter(ch)
 134                         || type == Character.CURRENCY_SYMBOL
 135                         || type == Character.CONNECTOR_PUNCTUATION
 136                         || Character.isDigit(ch)
 137                         || type == Character.LETTER_NUMBER
 138                         || type == Character.COMBINING_SPACING_MARK
 139                         || type == Character.NON_SPACING_MARK
 140                         || Character.isIdentifierIgnorable(ch);
 141             }
 142 
 143             if (Character.isJavaIdentifierPart((char) i) != expected) {
 144                 throw new RuntimeException(
 145                 "Character.isJavaIdentifierPart(char) failed for codepoint "
 146                                 + Integer.toHexString(i));
 147             }
 148         }
 149     }
 150 
 151     /**
 152      * Assertion testing for public static boolean isJavaIdentifierStart(int
 153      * codePoint), A character may start a Java identifier if and only if it is
 154      * one of the following:
 155      * <ul>
 156      * <li>it is a letter;</li>
 157      * <li>getType(ch) returns LETTER_NUMBER;</li>   
 158      * <li>it is a currency symbol (such as "$");</li>
 159      * <li>it is a connecting punctuation character (such as "_");</li>
 160      * </ul>
 161      * All Code points from (0x0000..0x10FFFF) are tested.
 162      */
 163     public static void testIsJavaIdentifierStart_int() {
 164         for (int cp = 0; cp <= Character.MAX_CODE_POINT; cp++) {
 165             boolean expected = false;
 166             // Since Character.isJavaIdentifierStart(int) strictly conforms to
 167             // character information from version 6.2 of the Unicode Standard,
 168             // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
 169             // list. If the code point is found in list 
 170             // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 
 171             // "expected" is considered false.
 172             if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(cp)) {
 173                 byte type = (byte) Character.getType(cp);
 174                 expected = Character.isLetter(cp)
 175                         || type == Character.LETTER_NUMBER
 176                         || type == Character.CURRENCY_SYMBOL
 177                         || type == Character.CONNECTOR_PUNCTUATION;
 178             }
 179 
 180             if (Character.isJavaIdentifierStart(cp) != expected) {
 181                 throw new RuntimeException(
 182                         "Character.isLetter(int) failed for codepoint "
 183                                 + Integer.toHexString(cp));
 184             }
 185         }
 186     }
 187 
 188     /**
 189      * Assertion testing for public static boolean isJavaIdentifierStart(char),
 190      * A character may start a Java identifier if and only if it is
 191      * one of the following:
 192      * <ul>
 193      * <li>it is a letter;</li>
 194      * <li>getType(ch) returns LETTER_NUMBER;</li>   
 195      * <li>it is a currency symbol (such as "$");</li>
 196      * <li>it is a connecting punctuation character (such as "_");</li>
 197      * </ul>
 198      * All Unicode code points in the BMP (0x0000..0xFFFF) are tested.
 199      */
 200     public static void testIsJavaIdentifierStart_char() {
 201         for (int i = 0; i <= Character.MAX_VALUE; i++) {
 202             char ch = (char) i;
 203             boolean expected = false;
 204             // Since Character.isJavaIdentifierStart(char) strictly conforms to
 205             // character information from version 6.2 of the Unicode Standard,
 206             // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
 207             // list. If the code point is found in list 
 208             // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 
 209             // "expected" is considered false.
 210             if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) {
 211                 byte type = (byte) Character.getType(ch);
 212                 expected = Character.isLetter(ch)
 213                         || type == Character.LETTER_NUMBER
 214                         || type == Character.CURRENCY_SYMBOL
 215                         || type == Character.CONNECTOR_PUNCTUATION;
 216             }
 217             
 218             if (Character.isJavaIdentifierStart(ch) != expected) {
 219                 throw new RuntimeException(
 220                         "Character.isLetter(char) failed for codepoint "
 221                                 + Integer.toHexString(i));
 222             }
 223         }
 224     }
 225     
 226     /**
 227      * Assertion testing for public static boolean isJavaLetter(char ch), A
 228      * character may start a Java identifier if and only if one of the following
 229      * is true:
 230      * <ul>
 231      * <li>isLetter(ch) returns true
 232      * <li>getType(ch) returns LETTER_NUMBER
 233      * <li>ch is a currency symbol (such as "$")
 234      * <li>ch is a connecting punctuation character (such as "_").
 235      * </ul>
 236      * All Unicode code points in the BMP (0x0000..0xFFFF) are tested.
 237      */
 238     public static void testIsJavaLetter() {
 239         for (int i = 0; i <= Character.MAX_VALUE; ++i) {
 240             char ch = (char) i;
 241             boolean expected = false;
 242             // Since Character.isJavaLetter(char) strictly conforms to
 243             // character information from version 6.2 of the Unicode Standard,
 244             // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
 245             // list. If the code point is found in list 
 246             // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 
 247             // "expected" is considered false.
 248             if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) {
 249                 byte type = (byte) Character.getType(ch);
 250                 expected = Character.isLetter(ch)
 251                         || type == Character.LETTER_NUMBER
 252                         || type == Character.CURRENCY_SYMBOL
 253                         || type == Character.CONNECTOR_PUNCTUATION;
 254             }
 255 
 256             if (Character.isJavaLetter(ch) != expected) {
 257                 throw new RuntimeException(
 258                         "Character.isJavaLetter(ch) failed for codepoint "
 259                                 + Integer.toHexString(i));
 260             }
 261         }
 262     }
 263 
 264     /**
 265      * Assertion testing for public static boolean isJavaLetterOrDigit(char ch),
 266      * A character may be part of a Java identifier if and only if any of the
 267      * following are true:
 268      * <ul>
 269      * <li>it is a letter
 270      * <li>it is a currency symbol (such as '$')
 271      * <li>it is a connecting punctuation character (such as '_')
 272      * <li>it is a digit
 273      * <li>it is a numeric letter (such as a Roman numeral character)
 274      * <li>it is a combining mark
 275      * <li>it is a non-spacing mark
 276      * <li>isIdentifierIgnorable returns true for the character.
 277      * </ul>
 278      * All Unicode code points in the BMP (0x0000..0xFFFF) are tested.
 279      */
 280     public static void testIsJavaLetterOrDigit() {
 281         for (int i = 0; i <= Character.MAX_VALUE; ++i) {
 282             char ch = (char) i;
 283             boolean expected = false;
 284             // Since Character.isJavaLetterOrDigit(char) strictly conforms to
 285             // character information from version 6.2 of the Unicode Standard,
 286             // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2"
 287             // list. If the code point is found in list  
 288             // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 
 289             // "expected" is considered false.
 290             if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) {
 291                 byte type = (byte) Character.getType(ch);
 292                 expected = Character.isLetter(ch)
 293                         || type == Character.CURRENCY_SYMBOL
 294                         || type == Character.CONNECTOR_PUNCTUATION
 295                         || Character.isDigit(ch)
 296                         || type == Character.LETTER_NUMBER
 297                         || type == Character.COMBINING_SPACING_MARK
 298                         || type == Character.NON_SPACING_MARK
 299                         || Character.isIdentifierIgnorable(ch);
 300             }
 301 
 302             if (Character.isJavaLetterOrDigit(ch) != expected) {
 303                 throw new RuntimeException(
 304                         "Character.isJavaLetterOrDigit(ch) failed for codepoint "
 305                                 + Integer.toHexString(i));
 306             }
 307         }
 308     }
 309 }