1 /* 2 * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 /** 29 * The CharacterData00 class encapsulates the large tables once found in 30 * java.lang.Character 31 */ 32 33 class CharacterData00 extends CharacterData { 34 /* The character properties are currently encoded into 32 bits in the following manner: 35 1 bit mirrored property 36 4 bits directionality property 37 9 bits signed offset used for converting case 38 1 bit if 1, adding the signed offset converts the character to lowercase 39 1 bit if 1, subtracting the signed offset converts the character to uppercase 40 1 bit if 1, this character has a titlecase equivalent (possibly itself) 41 3 bits 0 may not be part of an identifier 42 1 ignorable control; may continue a Unicode identifier or Java identifier 43 2 may continue a Java identifier but not a Unicode identifier (unused) 44 3 may continue a Unicode identifier or Java identifier 45 4 is a Java whitespace character 46 5 may start or continue a Java identifier; 47 may continue but not start a Unicode identifier (underscores) 48 6 may start or continue a Java identifier but not a Unicode identifier ($) 49 7 may start or continue a Unicode identifier or Java identifier 50 Thus: 51 5, 6, 7 may start a Java identifier 52 1, 2, 3, 5, 6, 7 may continue a Java identifier 53 7 may start a Unicode identifier 54 1, 3, 5, 7 may continue a Unicode identifier 55 1 is ignorable within an identifier 56 4 is Java whitespace 57 2 bits 0 this character has no numeric property 58 1 adding the digit offset to the character code and then 59 masking with 0x1F will produce the desired numeric value 60 2 this character has a "strange" numeric value 61 3 a Java supradecimal digit: adding the digit offset to the 62 character code, then masking with 0x1F, then adding 10 63 will produce the desired numeric value 64 5 bits digit offset 65 5 bits character type 66 67 The encoding of character properties is subject to change at any time. 68 */ 69 70 int getProperties(int ch) { 71 char offset = (char)ch; 72 int props = $$Lookup(offset); 73 return props; 74 } 75 76 int getPropertiesEx(int ch) { 77 char offset = (char)ch; 78 int props = $$LookupEx(offset); 79 return props; 80 } 81 82 int getType(int ch) { 83 int props = getProperties(ch); 84 return (props & $$maskType); 85 } 86 87 boolean isOtherLowercase(int ch) { 88 int props = getPropertiesEx(ch); 89 return (props & $$maskOtherLowercase) != 0; 90 } 91 92 boolean isOtherUppercase(int ch) { 93 int props = getPropertiesEx(ch); 94 return (props & $$maskOtherUppercase) != 0; 95 } 96 97 boolean isOtherAlphabetic(int ch) { 98 int props = getPropertiesEx(ch); 99 return (props & $$maskOtherAlphabetic) != 0; 100 } 101 102 boolean isIdeographic(int ch) { 103 int props = getPropertiesEx(ch); 104 return (props & $$maskIdeographic) != 0; 105 } 106 107 boolean isJavaIdentifierStart(int ch) { 108 // isJavaIdentifierStart strictly conforms to code points assigned 109 // in Unicode 6.2. Since code points {32FF} and {20BB..20BF} are not 110 // from Unicode 6.2, return false. 111 if(ch == 0x32FF || (ch>= 0x20BB && ch<= 0x20BF)) 112 return false; 113 int props = getProperties(ch); 114 return ((props & $$maskIdentifierInfo) >= $$lowJavaStart); 115 } 116 117 boolean isJavaIdentifierPart(int ch) { 118 // isJavaIdentifierPart strictly conforms to code points assigned 119 // in Unicode 6.2. Since code points {32FF} and {20BB..20BF} are not 120 // from Unicode 6.2, return false. 121 if(ch == 0x32FF || (ch>= 0x20BB && ch<= 0x20BF)) 122 return false; 123 int props = getProperties(ch); 124 return ((props & $$nonzeroJavaPart) != 0); 125 } 126 127 boolean isUnicodeIdentifierStart(int ch) { 128 int props = getProperties(ch); 129 return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart); 130 } 131 132 boolean isUnicodeIdentifierPart(int ch) { 133 int props = getProperties(ch); 134 return ((props & $$maskUnicodePart) != 0); 135 } 136 137 boolean isIdentifierIgnorable(int ch) { 138 int props = getProperties(ch); 139 return ((props & $$maskIdentifierInfo) == $$valueIgnorable); 140 } 141 142 int toLowerCase(int ch) { 143 int mapChar = ch; 144 int val = getProperties(ch); 145 146 if ((val & $$maskLowerCase) != 0) { 147 if ((val & $$maskCaseOffset) == $$maskCaseOffset) { 148 switch(ch) { 149 // map the offset overflow chars 150 case 0x0130 : mapChar = 0x0069; break; 151 case 0x2126 : mapChar = 0x03C9; break; 152 case 0x212A : mapChar = 0x006B; break; 153 case 0x212B : mapChar = 0x00E5; break; 154 // map the titlecase chars with both a 1:M uppercase map 155 // and a lowercase map 156 case 0x1F88 : mapChar = 0x1F80; break; 157 case 0x1F89 : mapChar = 0x1F81; break; 158 case 0x1F8A : mapChar = 0x1F82; break; 159 case 0x1F8B : mapChar = 0x1F83; break; 160 case 0x1F8C : mapChar = 0x1F84; break; 161 case 0x1F8D : mapChar = 0x1F85; break; 162 case 0x1F8E : mapChar = 0x1F86; break; 163 case 0x1F8F : mapChar = 0x1F87; break; 164 case 0x1F98 : mapChar = 0x1F90; break; 165 case 0x1F99 : mapChar = 0x1F91; break; 166 case 0x1F9A : mapChar = 0x1F92; break; 167 case 0x1F9B : mapChar = 0x1F93; break; 168 case 0x1F9C : mapChar = 0x1F94; break; 169 case 0x1F9D : mapChar = 0x1F95; break; 170 case 0x1F9E : mapChar = 0x1F96; break; 171 case 0x1F9F : mapChar = 0x1F97; break; 172 case 0x1FA8 : mapChar = 0x1FA0; break; 173 case 0x1FA9 : mapChar = 0x1FA1; break; 174 case 0x1FAA : mapChar = 0x1FA2; break; 175 case 0x1FAB : mapChar = 0x1FA3; break; 176 case 0x1FAC : mapChar = 0x1FA4; break; 177 case 0x1FAD : mapChar = 0x1FA5; break; 178 case 0x1FAE : mapChar = 0x1FA6; break; 179 case 0x1FAF : mapChar = 0x1FA7; break; 180 case 0x1FBC : mapChar = 0x1FB3; break; 181 case 0x1FCC : mapChar = 0x1FC3; break; 182 case 0x1FFC : mapChar = 0x1FF3; break; 183 184 case 0x023A : mapChar = 0x2C65; break; 185 case 0x023E : mapChar = 0x2C66; break; 186 case 0x10A0 : mapChar = 0x2D00; break; 187 case 0x10A1 : mapChar = 0x2D01; break; 188 case 0x10A2 : mapChar = 0x2D02; break; 189 case 0x10A3 : mapChar = 0x2D03; break; 190 case 0x10A4 : mapChar = 0x2D04; break; 191 case 0x10A5 : mapChar = 0x2D05; break; 192 case 0x10A6 : mapChar = 0x2D06; break; 193 case 0x10A7 : mapChar = 0x2D07; break; 194 case 0x10A8 : mapChar = 0x2D08; break; 195 case 0x10A9 : mapChar = 0x2D09; break; 196 case 0x10AA : mapChar = 0x2D0A; break; 197 case 0x10AB : mapChar = 0x2D0B; break; 198 case 0x10AC : mapChar = 0x2D0C; break; 199 case 0x10AD : mapChar = 0x2D0D; break; 200 case 0x10AE : mapChar = 0x2D0E; break; 201 case 0x10AF : mapChar = 0x2D0F; break; 202 case 0x10B0 : mapChar = 0x2D10; break; 203 case 0x10B1 : mapChar = 0x2D11; break; 204 case 0x10B2 : mapChar = 0x2D12; break; 205 case 0x10B3 : mapChar = 0x2D13; break; 206 case 0x10B4 : mapChar = 0x2D14; break; 207 case 0x10B5 : mapChar = 0x2D15; break; 208 case 0x10B6 : mapChar = 0x2D16; break; 209 case 0x10B7 : mapChar = 0x2D17; break; 210 case 0x10B8 : mapChar = 0x2D18; break; 211 case 0x10B9 : mapChar = 0x2D19; break; 212 case 0x10BA : mapChar = 0x2D1A; break; 213 case 0x10BB : mapChar = 0x2D1B; break; 214 case 0x10BC : mapChar = 0x2D1C; break; 215 case 0x10BD : mapChar = 0x2D1D; break; 216 case 0x10BE : mapChar = 0x2D1E; break; 217 case 0x10BF : mapChar = 0x2D1F; break; 218 case 0x10C0 : mapChar = 0x2D20; break; 219 case 0x10C1 : mapChar = 0x2D21; break; 220 case 0x10C2 : mapChar = 0x2D22; break; 221 case 0x10C3 : mapChar = 0x2D23; break; 222 case 0x10C4 : mapChar = 0x2D24; break; 223 case 0x10C5 : mapChar = 0x2D25; break; 224 case 0x10C7 : mapChar = 0x2D27; break; 225 case 0x10CD : mapChar = 0x2D2D; break; 226 case 0x1E9E : mapChar = 0x00DF; break; 227 case 0x2C62 : mapChar = 0x026B; break; 228 case 0x2C63 : mapChar = 0x1D7D; break; 229 case 0x2C64 : mapChar = 0x027D; break; 230 case 0x2C6D : mapChar = 0x0251; break; 231 case 0x2C6E : mapChar = 0x0271; break; 232 case 0x2C6F : mapChar = 0x0250; break; 233 case 0x2C70 : mapChar = 0x0252; break; 234 case 0x2C7E : mapChar = 0x023F; break; 235 case 0x2C7F : mapChar = 0x0240; break; 236 case 0xA77D : mapChar = 0x1D79; break; 237 case 0xA78D : mapChar = 0x0265; break; 238 case 0xA7AA : mapChar = 0x0266; break; 239 // default mapChar is already set, so no 240 // need to redo it here. 241 // default : mapChar = ch; 242 } 243 } 244 else { 245 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 246 mapChar = ch + offset; 247 } 248 } 249 return mapChar; 250 } 251 252 int toUpperCase(int ch) { 253 int mapChar = ch; 254 int val = getProperties(ch); 255 256 if ((val & $$maskUpperCase) != 0) { 257 if ((val & $$maskCaseOffset) == $$maskCaseOffset) { 258 switch(ch) { 259 // map chars with overflow offsets 260 case 0x00B5 : mapChar = 0x039C; break; 261 case 0x017F : mapChar = 0x0053; break; 262 case 0x1FBE : mapChar = 0x0399; break; 263 // map char that have both a 1:1 and 1:M map 264 case 0x1F80 : mapChar = 0x1F88; break; 265 case 0x1F81 : mapChar = 0x1F89; break; 266 case 0x1F82 : mapChar = 0x1F8A; break; 267 case 0x1F83 : mapChar = 0x1F8B; break; 268 case 0x1F84 : mapChar = 0x1F8C; break; 269 case 0x1F85 : mapChar = 0x1F8D; break; 270 case 0x1F86 : mapChar = 0x1F8E; break; 271 case 0x1F87 : mapChar = 0x1F8F; break; 272 case 0x1F90 : mapChar = 0x1F98; break; 273 case 0x1F91 : mapChar = 0x1F99; break; 274 case 0x1F92 : mapChar = 0x1F9A; break; 275 case 0x1F93 : mapChar = 0x1F9B; break; 276 case 0x1F94 : mapChar = 0x1F9C; break; 277 case 0x1F95 : mapChar = 0x1F9D; break; 278 case 0x1F96 : mapChar = 0x1F9E; break; 279 case 0x1F97 : mapChar = 0x1F9F; break; 280 case 0x1FA0 : mapChar = 0x1FA8; break; 281 case 0x1FA1 : mapChar = 0x1FA9; break; 282 case 0x1FA2 : mapChar = 0x1FAA; break; 283 case 0x1FA3 : mapChar = 0x1FAB; break; 284 case 0x1FA4 : mapChar = 0x1FAC; break; 285 case 0x1FA5 : mapChar = 0x1FAD; break; 286 case 0x1FA6 : mapChar = 0x1FAE; break; 287 case 0x1FA7 : mapChar = 0x1FAF; break; 288 case 0x1FB3 : mapChar = 0x1FBC; break; 289 case 0x1FC3 : mapChar = 0x1FCC; break; 290 case 0x1FF3 : mapChar = 0x1FFC; break; 291 292 case 0x023F : mapChar = 0x2C7E; break; 293 case 0x0240 : mapChar = 0x2C7F; break; 294 case 0x0250 : mapChar = 0x2C6F; break; 295 case 0x0251 : mapChar = 0x2C6D; break; 296 case 0x0252 : mapChar = 0x2C70; break; 297 case 0x0265 : mapChar = 0xA78D; break; 298 case 0x0266 : mapChar = 0xA7AA; break; 299 case 0x026B : mapChar = 0x2C62; break; 300 case 0x0271 : mapChar = 0x2C6E; break; 301 case 0x027D : mapChar = 0x2C64; break; 302 case 0x1D79 : mapChar = 0xA77D; break; 303 case 0x1D7D : mapChar = 0x2C63; break; 304 case 0x2C65 : mapChar = 0x023A; break; 305 case 0x2C66 : mapChar = 0x023E; break; 306 case 0x2D00 : mapChar = 0x10A0; break; 307 case 0x2D01 : mapChar = 0x10A1; break; 308 case 0x2D02 : mapChar = 0x10A2; break; 309 case 0x2D03 : mapChar = 0x10A3; break; 310 case 0x2D04 : mapChar = 0x10A4; break; 311 case 0x2D05 : mapChar = 0x10A5; break; 312 case 0x2D06 : mapChar = 0x10A6; break; 313 case 0x2D07 : mapChar = 0x10A7; break; 314 case 0x2D08 : mapChar = 0x10A8; break; 315 case 0x2D09 : mapChar = 0x10A9; break; 316 case 0x2D0A : mapChar = 0x10AA; break; 317 case 0x2D0B : mapChar = 0x10AB; break; 318 case 0x2D0C : mapChar = 0x10AC; break; 319 case 0x2D0D : mapChar = 0x10AD; break; 320 case 0x2D0E : mapChar = 0x10AE; break; 321 case 0x2D0F : mapChar = 0x10AF; break; 322 case 0x2D10 : mapChar = 0x10B0; break; 323 case 0x2D11 : mapChar = 0x10B1; break; 324 case 0x2D12 : mapChar = 0x10B2; break; 325 case 0x2D13 : mapChar = 0x10B3; break; 326 case 0x2D14 : mapChar = 0x10B4; break; 327 case 0x2D15 : mapChar = 0x10B5; break; 328 case 0x2D16 : mapChar = 0x10B6; break; 329 case 0x2D17 : mapChar = 0x10B7; break; 330 case 0x2D18 : mapChar = 0x10B8; break; 331 case 0x2D19 : mapChar = 0x10B9; break; 332 case 0x2D1A : mapChar = 0x10BA; break; 333 case 0x2D1B : mapChar = 0x10BB; break; 334 case 0x2D1C : mapChar = 0x10BC; break; 335 case 0x2D1D : mapChar = 0x10BD; break; 336 case 0x2D1E : mapChar = 0x10BE; break; 337 case 0x2D1F : mapChar = 0x10BF; break; 338 case 0x2D20 : mapChar = 0x10C0; break; 339 case 0x2D21 : mapChar = 0x10C1; break; 340 case 0x2D22 : mapChar = 0x10C2; break; 341 case 0x2D23 : mapChar = 0x10C3; break; 342 case 0x2D24 : mapChar = 0x10C4; break; 343 case 0x2D25 : mapChar = 0x10C5; break; 344 case 0x2D27 : mapChar = 0x10C7; break; 345 case 0x2D2D : mapChar = 0x10CD; break; 346 // ch must have a 1:M case mapping, but we 347 // can't handle it here. Return ch. 348 // since mapChar is already set, no need 349 // to redo it here. 350 //default : mapChar = ch; 351 } 352 } 353 else { 354 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 355 mapChar = ch - offset; 356 } 357 } 358 return mapChar; 359 } 360 361 int toTitleCase(int ch) { 362 int mapChar = ch; 363 int val = getProperties(ch); 364 365 if ((val & $$maskTitleCase) != 0) { 366 // There is a titlecase equivalent. Perform further checks: 367 if ((val & $$maskUpperCase) == 0) { 368 // The character does not have an uppercase equivalent, so it must 369 // already be uppercase; so add 1 to get the titlecase form. 370 mapChar = ch + 1; 371 } 372 else if ((val & $$maskLowerCase) == 0) { 373 // The character does not have a lowercase equivalent, so it must 374 // already be lowercase; so subtract 1 to get the titlecase form. 375 mapChar = ch - 1; 376 } 377 // else { 378 // The character has both an uppercase equivalent and a lowercase 379 // equivalent, so it must itself be a titlecase form; return it. 380 // return ch; 381 //} 382 } 383 else if ((val & $$maskUpperCase) != 0) { 384 // This character has no titlecase equivalent but it does have an 385 // uppercase equivalent, so use that (subtract the signed case offset). 386 mapChar = toUpperCase(ch); 387 } 388 return mapChar; 389 } 390 391 int digit(int ch, int radix) { 392 int value = -1; 393 if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) { 394 int val = getProperties(ch); 395 int kind = val & $$maskType; 396 if (kind == Character.DECIMAL_DIGIT_NUMBER) { 397 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit; 398 } 399 else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) { 400 // Java supradecimal digit 401 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10; 402 } 403 } 404 return (value < radix) ? value : -1; 405 } 406 407 int getNumericValue(int ch) { 408 int val = getProperties(ch); 409 int retval = -1; 410 411 switch (val & $$maskNumericType) { 412 default: // cannot occur 413 case ($$valueNotNumeric): // not numeric 414 retval = -1; 415 break; 416 case ($$valueDigit): // simple numeric 417 retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit; 418 break; 419 case ($$valueStrangeNumeric) : // "strange" numeric 420 switch (ch) { 421 case 0x0BF1: retval = 100; break; // TAMIL NUMBER ONE HUNDRED 422 case 0x0BF2: retval = 1000; break; // TAMIL NUMBER ONE THOUSAND 423 case 0x1375: retval = 40; break; // ETHIOPIC NUMBER FORTY 424 case 0x1376: retval = 50; break; // ETHIOPIC NUMBER FIFTY 425 case 0x1377: retval = 60; break; // ETHIOPIC NUMBER SIXTY 426 case 0x1378: retval = 70; break; // ETHIOPIC NUMBER SEVENTY 427 case 0x1379: retval = 80; break; // ETHIOPIC NUMBER EIGHTY 428 case 0x137A: retval = 90; break; // ETHIOPIC NUMBER NINETY 429 case 0x137B: retval = 100; break; // ETHIOPIC NUMBER HUNDRED 430 case 0x137C: retval = 10000; break; // ETHIOPIC NUMBER TEN THOUSAND 431 case 0x215F: retval = 1; break; // FRACTION NUMERATOR ONE 432 case 0x216C: retval = 50; break; // ROMAN NUMERAL FIFTY 433 case 0x216D: retval = 100; break; // ROMAN NUMERAL ONE HUNDRED 434 case 0x216E: retval = 500; break; // ROMAN NUMERAL FIVE HUNDRED 435 case 0x216F: retval = 1000; break; // ROMAN NUMERAL ONE THOUSAND 436 case 0x217C: retval = 50; break; // SMALL ROMAN NUMERAL FIFTY 437 case 0x217D: retval = 100; break; // SMALL ROMAN NUMERAL ONE HUNDRED 438 case 0x217E: retval = 500; break; // SMALL ROMAN NUMERAL FIVE HUNDRED 439 case 0x217F: retval = 1000; break; // SMALL ROMAN NUMERAL ONE THOUSAND 440 case 0x2180: retval = 1000; break; // ROMAN NUMERAL ONE THOUSAND C D 441 case 0x2181: retval = 5000; break; // ROMAN NUMERAL FIVE THOUSAND 442 case 0x2182: retval = 10000; break; // ROMAN NUMERAL TEN THOUSAND 443 444 case 0x324B: retval = 40; break; 445 case 0x324C: retval = 50; break; 446 case 0x324D: retval = 60; break; 447 case 0x324E: retval = 70; break; 448 case 0x324F: retval = 80; break; 449 case 0x325C: retval = 32; break; 450 451 case 0x325D: retval = 33; break; // CIRCLED NUMBER THIRTY THREE 452 case 0x325E: retval = 34; break; // CIRCLED NUMBER THIRTY FOUR 453 case 0x325F: retval = 35; break; // CIRCLED NUMBER THIRTY FIVE 454 case 0x32B1: retval = 36; break; // CIRCLED NUMBER THIRTY SIX 455 case 0x32B2: retval = 37; break; // CIRCLED NUMBER THIRTY SEVEN 456 case 0x32B3: retval = 38; break; // CIRCLED NUMBER THIRTY EIGHT 457 case 0x32B4: retval = 39; break; // CIRCLED NUMBER THIRTY NINE 458 case 0x32B5: retval = 40; break; // CIRCLED NUMBER FORTY 459 case 0x32B6: retval = 41; break; // CIRCLED NUMBER FORTY ONE 460 case 0x32B7: retval = 42; break; // CIRCLED NUMBER FORTY TWO 461 case 0x32B8: retval = 43; break; // CIRCLED NUMBER FORTY THREE 462 case 0x32B9: retval = 44; break; // CIRCLED NUMBER FORTY FOUR 463 case 0x32BA: retval = 45; break; // CIRCLED NUMBER FORTY FIVE 464 case 0x32BB: retval = 46; break; // CIRCLED NUMBER FORTY SIX 465 case 0x32BC: retval = 47; break; // CIRCLED NUMBER FORTY SEVEN 466 case 0x32BD: retval = 48; break; // CIRCLED NUMBER FORTY EIGHT 467 case 0x32BE: retval = 49; break; // CIRCLED NUMBER FORTY NINE 468 case 0x32BF: retval = 50; break; // CIRCLED NUMBER FIFTY 469 470 case 0x0D71: retval = 100; break; // MALAYALAM NUMBER ONE HUNDRED 471 case 0x0D72: retval = 1000; break; // MALAYALAM NUMBER ONE THOUSAND 472 case 0x2186: retval = 50; break; // ROMAN NUMERAL FIFTY EARLY FORM 473 case 0x2187: retval = 50000; break; // ROMAN NUMERAL FIFTY THOUSAND 474 case 0x2188: retval = 100000; break; // ROMAN NUMERAL ONE HUNDRED THOUSAND 475 476 default: retval = -2; break; 477 } 478 break; 479 case ($$valueJavaSupradecimal): // Java supradecimal 480 retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10; 481 break; 482 } 483 return retval; 484 } 485 486 boolean isWhitespace(int ch) { 487 int props = getProperties(ch); 488 return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace); 489 } 490 491 byte getDirectionality(int ch) { 492 int val = getProperties(ch); 493 byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi); 494 if (directionality == 0xF ) { 495 switch(ch) { 496 case 0x202A : 497 // This is the only char with LRE 498 directionality = Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING; 499 break; 500 case 0x202B : 501 // This is the only char with RLE 502 directionality = Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING; 503 break; 504 case 0x202C : 505 // This is the only char with PDF 506 directionality = Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT; 507 break; 508 case 0x202D : 509 // This is the only char with LRO 510 directionality = Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE; 511 break; 512 case 0x202E : 513 // This is the only char with RLO 514 directionality = Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE; 515 break; 516 default : 517 directionality = Character.DIRECTIONALITY_UNDEFINED; 518 break; 519 } 520 } 521 return directionality; 522 } 523 524 boolean isMirrored(int ch) { 525 int props = getProperties(ch); 526 return ((props & $$maskMirrored) != 0); 527 } 528 529 int toUpperCaseEx(int ch) { 530 int mapChar = ch; 531 int val = getProperties(ch); 532 533 if ((val & $$maskUpperCase) != 0) { 534 if ((val & $$maskCaseOffset) != $$maskCaseOffset) { 535 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 536 mapChar = ch - offset; 537 } 538 else { 539 switch(ch) { 540 // map overflow characters 541 case 0x00B5 : mapChar = 0x039C; break; 542 case 0x017F : mapChar = 0x0053; break; 543 case 0x1FBE : mapChar = 0x0399; break; 544 545 case 0x023F : mapChar = 0x2C7E; break; 546 case 0x0240 : mapChar = 0x2C7F; break; 547 case 0x0250 : mapChar = 0x2C6F; break; 548 case 0x0251 : mapChar = 0x2C6D; break; 549 case 0x0252 : mapChar = 0x2C70; break; 550 case 0x0265 : mapChar = 0xA78D; break; 551 case 0x0266 : mapChar = 0xA7AA; break; 552 case 0x026B : mapChar = 0x2C62; break; 553 case 0x0271 : mapChar = 0x2C6E; break; 554 case 0x027D : mapChar = 0x2C64; break; 555 case 0x1D79 : mapChar = 0xA77D; break; 556 case 0x1D7D : mapChar = 0x2C63; break; 557 case 0x2C65 : mapChar = 0x023A; break; 558 case 0x2C66 : mapChar = 0x023E; break; 559 case 0x2D00 : mapChar = 0x10A0; break; 560 case 0x2D01 : mapChar = 0x10A1; break; 561 case 0x2D02 : mapChar = 0x10A2; break; 562 case 0x2D03 : mapChar = 0x10A3; break; 563 case 0x2D04 : mapChar = 0x10A4; break; 564 case 0x2D05 : mapChar = 0x10A5; break; 565 case 0x2D06 : mapChar = 0x10A6; break; 566 case 0x2D07 : mapChar = 0x10A7; break; 567 case 0x2D08 : mapChar = 0x10A8; break; 568 case 0x2D09 : mapChar = 0x10A9; break; 569 case 0x2D0A : mapChar = 0x10AA; break; 570 case 0x2D0B : mapChar = 0x10AB; break; 571 case 0x2D0C : mapChar = 0x10AC; break; 572 case 0x2D0D : mapChar = 0x10AD; break; 573 case 0x2D0E : mapChar = 0x10AE; break; 574 case 0x2D0F : mapChar = 0x10AF; break; 575 case 0x2D10 : mapChar = 0x10B0; break; 576 case 0x2D11 : mapChar = 0x10B1; break; 577 case 0x2D12 : mapChar = 0x10B2; break; 578 case 0x2D13 : mapChar = 0x10B3; break; 579 case 0x2D14 : mapChar = 0x10B4; break; 580 case 0x2D15 : mapChar = 0x10B5; break; 581 case 0x2D16 : mapChar = 0x10B6; break; 582 case 0x2D17 : mapChar = 0x10B7; break; 583 case 0x2D18 : mapChar = 0x10B8; break; 584 case 0x2D19 : mapChar = 0x10B9; break; 585 case 0x2D1A : mapChar = 0x10BA; break; 586 case 0x2D1B : mapChar = 0x10BB; break; 587 case 0x2D1C : mapChar = 0x10BC; break; 588 case 0x2D1D : mapChar = 0x10BD; break; 589 case 0x2D1E : mapChar = 0x10BE; break; 590 case 0x2D1F : mapChar = 0x10BF; break; 591 case 0x2D20 : mapChar = 0x10C0; break; 592 case 0x2D21 : mapChar = 0x10C1; break; 593 case 0x2D22 : mapChar = 0x10C2; break; 594 case 0x2D23 : mapChar = 0x10C3; break; 595 case 0x2D24 : mapChar = 0x10C4; break; 596 case 0x2D25 : mapChar = 0x10C5; break; 597 case 0x2D27 : mapChar = 0x10C7; break; 598 case 0x2D2D : mapChar = 0x10CD; break; 599 default : mapChar = Character.ERROR; break; 600 } 601 } 602 } 603 return mapChar; 604 } 605 606 char[] toUpperCaseCharArray(int ch) { 607 char[] upperMap = {(char)ch}; 608 int location = findInCharMap(ch); 609 if (location != -1) { 610 upperMap = charMap[location][1]; 611 } 612 return upperMap; 613 } 614 615 616 /** 617 * Finds the character in the uppercase mapping table. 618 * 619 * @param ch the <code>char</code> to search 620 * @return the index location ch in the table or -1 if not found 621 * @since 1.4 622 */ 623 int findInCharMap(int ch) { 624 if (charMap == null || charMap.length == 0) { 625 return -1; 626 } 627 int top, bottom, current; 628 bottom = 0; 629 top = charMap.length; 630 current = top/2; 631 // invariant: top > current >= bottom && ch >= CharacterData.charMap[bottom][0] 632 while (top - bottom > 1) { 633 if (ch >= charMap[current][0][0]) { 634 bottom = current; 635 } else { 636 top = current; 637 } 638 current = (top + bottom) / 2; 639 } 640 if (ch == charMap[current][0][0]) return current; 641 else return -1; 642 } 643 644 static final CharacterData00 instance = new CharacterData00(); 645 private CharacterData00() {}; 646 647 $$Tables 648 649 static { 650 $$Initializers 651 } 652 }