1 /* 2 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary Test behavior of isJavaIdentifierXX, testIsJavaLetter, and 27 * testIsJavaLetterOrDigit methods for all code points. 28 * @bug 8218915 29 */ 30 31 import java.util.List; 32 import java.util.ArrayList; 33 34 public class TestIsJavaIdentifierMethods { 35 36 // List of new code points are not present in Unicode 6.2. 37 private static final List<Integer> UNASSIGNED_CODEPOINTS_IN_6_2 38 = new ArrayList<Integer>() 39 {{ 40 add(0x20BB); // NORDIC MARK SIGN 41 add(0x20BC); // MANAT SIGN 42 add(0x20BD); // RUBLE SIGN 43 add(0x20BE); // LARI SIGN 44 add(0x20BF); // BITCOIN SIGN 45 add(0x32FF); // SQUARE ERA NAME NEWERA 46 }}; 47 48 public static void main(String[] args) { 49 testIsJavaIdentifierPart_int(); 50 testIsJavaIdentifierPart_char(); 51 testIsJavaIdentifierStart_int(); 52 testIsJavaIdentifierStart_char(); 53 testIsJavaLetter(); 54 testIsJavaLetterOrDigit(); 55 } 56 57 /** 58 * Assertion testing for public static boolean isJavaIdentifierPart(int 59 * codePoint), A character may be part of a Java identifier if any of the 60 * following are true: 61 * <ul> 62 * <li>it is a letter</li> 63 * <li>it is a currency symbol (such as <code>'$'</code>)</li> 64 * <li>it is a connecting punctuation character (such as <code>'_'</code>) 65 * </li> 66 * <li>it is a digit</li> 67 * <li>it is a numeric letter (such as a Roman numeral character)</li> 68 * <li>it is a combining mark</li> 69 * <li>it is a non-spacing mark</li> 70 * <li><code>isIdentifierIgnorable</code> returns <code>true</code> for the 71 * character</li> 72 * </ul> 73 * All code points from (0x0000..0x10FFFF) are tested. 74 */ 75 public static void testIsJavaIdentifierPart_int() { 76 for (int cp = 0; cp <= Character.MAX_CODE_POINT; cp++) { 77 boolean expected = false; 78 79 // Since Character.isJavaIdentifierPart(int) strictly conforms to 80 // character information from version 6.2 of the Unicode Standard, 81 // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" 82 // list. If the code point is found in list 83 // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 84 // "expected" is considered false. 85 if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(cp)) { 86 byte type = (byte) Character.getType(cp); 87 expected = Character.isLetter(cp) 88 || type == Character.CURRENCY_SYMBOL 89 || type == Character.CONNECTOR_PUNCTUATION 90 || Character.isDigit(cp) 91 || type == Character.LETTER_NUMBER 92 || type == Character.COMBINING_SPACING_MARK 93 || type == Character.NON_SPACING_MARK 94 || Character.isIdentifierIgnorable(cp); 95 } 96 97 if (Character.isJavaIdentifierPart(cp) != expected) { 98 throw new RuntimeException( 99 "Character.isJavaIdentifierPart(int) failed for codepoint " 100 + Integer.toHexString(cp)); 101 } 102 } 103 } 104 105 /** 106 * Assertion testing for public static boolean isJavaIdentifierPart(char 107 * ch), A character may be part of a Java identifier if any of the 108 * following are true: 109 * <ul> 110 * <li>it is a letter; 111 * <li>it is a currency symbol (such as "$"); 112 * <li>it is a connecting punctuation character (such as "_"); 113 * <li>it is a digit; 114 * <li>it is a numeric letter (such as a Roman numeral character); 115 * <li>it is a combining mark; 116 * <li>it is a non-spacing mark; 117 * <li>isIdentifierIgnorable returns true for the character. 118 * </ul> 119 * All Unicode code points in the BMP (0x0000..0xFFFF) are tested. 120 */ 121 public static void testIsJavaIdentifierPart_char() { 122 for (int i = 0; i <= Character.MAX_VALUE; ++i) { 123 char ch = (char) i; 124 boolean expected = false; 125 // Since Character.isJavaIdentifierPart(char) strictly conforms to 126 // character information from version 6.2 of the Unicode Standard, 127 // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" 128 // list. If the code point is found in list 129 // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 130 // "expected" is considered false. 131 if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) { 132 byte type = (byte) Character.getType(ch); 133 expected = Character.isLetter(ch) 134 || type == Character.CURRENCY_SYMBOL 135 || type == Character.CONNECTOR_PUNCTUATION 136 || Character.isDigit(ch) 137 || type == Character.LETTER_NUMBER 138 || type == Character.COMBINING_SPACING_MARK 139 || type == Character.NON_SPACING_MARK 140 || Character.isIdentifierIgnorable(ch); 141 } 142 143 if (Character.isJavaIdentifierPart((char) i) != expected) { 144 throw new RuntimeException( 145 "Character.isJavaIdentifierPart(char) failed for codepoint " 146 + Integer.toHexString(i)); 147 } 148 } 149 } 150 151 /** 152 * Assertion testing for public static boolean isJavaIdentifierStart(int 153 * codePoint), A character may start a Java identifier if and only if it is 154 * one of the following: 155 * <ul> 156 * <li>it is a letter;</li> 157 * <li>getType(ch) returns LETTER_NUMBER;</li> 158 * <li>it is a currency symbol (such as "$");</li> 159 * <li>it is a connecting punctuation character (such as "_");</li> 160 * </ul> 161 * All Code points from (0x0000..0x10FFFF) are tested. 162 */ 163 public static void testIsJavaIdentifierStart_int() { 164 for (int cp = 0; cp <= Character.MAX_CODE_POINT; cp++) { 165 boolean expected = false; 166 // Since Character.isJavaIdentifierStart(int) strictly conforms to 167 // character information from version 6.2 of the Unicode Standard, 168 // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" 169 // list. If the code point is found in list 170 // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 171 // "expected" is considered false. 172 if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(cp)) { 173 byte type = (byte) Character.getType(cp); 174 expected = Character.isLetter(cp) 175 || type == Character.LETTER_NUMBER 176 || type == Character.CURRENCY_SYMBOL 177 || type == Character.CONNECTOR_PUNCTUATION; 178 } 179 180 if (Character.isJavaIdentifierStart(cp) != expected) { 181 throw new RuntimeException( 182 "Character.isJavaIdentifierStart(int) failed for codepoint " 183 + Integer.toHexString(cp)); 184 } 185 } 186 } 187 188 /** 189 * Assertion testing for public static boolean isJavaIdentifierStart(char), 190 * A character may start a Java identifier if and only if it is 191 * one of the following: 192 * <ul> 193 * <li>it is a letter;</li> 194 * <li>getType(ch) returns LETTER_NUMBER;</li> 195 * <li>it is a currency symbol (such as "$");</li> 196 * <li>it is a connecting punctuation character (such as "_");</li> 197 * </ul> 198 * All Unicode code points in the BMP (0x0000..0xFFFF) are tested. 199 */ 200 public static void testIsJavaIdentifierStart_char() { 201 for (int i = 0; i <= Character.MAX_VALUE; i++) { 202 char ch = (char) i; 203 boolean expected = false; 204 // Since Character.isJavaIdentifierStart(char) strictly conforms to 205 // character information from version 6.2 of the Unicode Standard, 206 // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" 207 // list. If the code point is found in list 208 // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 209 // "expected" is considered false. 210 if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) { 211 byte type = (byte) Character.getType(ch); 212 expected = Character.isLetter(ch) 213 || type == Character.LETTER_NUMBER 214 || type == Character.CURRENCY_SYMBOL 215 || type == Character.CONNECTOR_PUNCTUATION; 216 } 217 218 if (Character.isJavaIdentifierStart(ch) != expected) { 219 throw new RuntimeException( 220 "Character.isJavaIdentifierStart(char) failed for codepoint " 221 + Integer.toHexString(i)); 222 } 223 } 224 } 225 226 /** 227 * Assertion testing for public static boolean isJavaLetter(char ch), A 228 * character may start a Java identifier if and only if one of the 229 * following is true: 230 * <ul> 231 * <li>isLetter(ch) returns true 232 * <li>getType(ch) returns LETTER_NUMBER 233 * <li>ch is a currency symbol (such as "$") 234 * <li>ch is a connecting punctuation character (such as "_"). 235 * </ul> 236 * All Unicode code points in the BMP (0x0000..0xFFFF) are tested. 237 */ 238 public static void testIsJavaLetter() { 239 for (int i = 0; i <= Character.MAX_VALUE; ++i) { 240 char ch = (char) i; 241 boolean expected = false; 242 // Since Character.isJavaLetter(char) strictly conforms to 243 // character information from version 6.2 of the Unicode Standard, 244 // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" 245 // list. If the code point is found in list 246 // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 247 // "expected" is considered false. 248 if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) { 249 byte type = (byte) Character.getType(ch); 250 expected = Character.isLetter(ch) 251 || type == Character.LETTER_NUMBER 252 || type == Character.CURRENCY_SYMBOL 253 || type == Character.CONNECTOR_PUNCTUATION; 254 } 255 256 if (Character.isJavaLetter(ch) != expected) { 257 throw new RuntimeException( 258 "Character.isJavaLetter(ch) failed for codepoint " 259 + Integer.toHexString(i)); 260 } 261 } 262 } 263 264 /** 265 * Assertion testing for public static boolean isJavaLetterOrDigit(char 266 * ch), A character may be part of a Java identifier if and only if any 267 * of the following are true: 268 * <ul> 269 * <li>it is a letter 270 * <li>it is a currency symbol (such as '$') 271 * <li>it is a connecting punctuation character (such as '_') 272 * <li>it is a digit 273 * <li>it is a numeric letter (such as a Roman numeral character) 274 * <li>it is a combining mark 275 * <li>it is a non-spacing mark 276 * <li>isIdentifierIgnorable returns true for the character. 277 * </ul> 278 * All Unicode code points in the BMP (0x0000..0xFFFF) are tested. 279 */ 280 public static void testIsJavaLetterOrDigit() { 281 for (int i = 0; i <= Character.MAX_VALUE; ++i) { 282 char ch = (char) i; 283 boolean expected = false; 284 // Since Character.isJavaLetterOrDigit(char) strictly conforms to 285 // character information from version 6.2 of the Unicode Standard, 286 // check if code point is in "UNASSIGNED_CODEPOINTS_IN_6_2" 287 // list. If the code point is found in list 288 // "UNASSIGNED_CODEPOINTS_IN_6_2", value of variable 289 // "expected" is considered false. 290 if (!UNASSIGNED_CODEPOINTS_IN_6_2.contains(i)) { 291 byte type = (byte) Character.getType(ch); 292 expected = Character.isLetter(ch) 293 || type == Character.CURRENCY_SYMBOL 294 || type == Character.CONNECTOR_PUNCTUATION 295 || Character.isDigit(ch) 296 || type == Character.LETTER_NUMBER 297 || type == Character.COMBINING_SPACING_MARK 298 || type == Character.NON_SPACING_MARK 299 || Character.isIdentifierIgnorable(ch); 300 } 301 302 if (Character.isJavaLetterOrDigit(ch) != expected) { 303 throw new RuntimeException( 304 "Character.isJavaLetterOrDigit(ch) failed for codepoint " 305 + Integer.toHexString(i)); 306 } 307 } 308 } 309 }