1 /* 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 /** 29 * The CharacterData00 class encapsulates the large tables once found in 30 * java.lang.Character 31 */ 32 33 class CharacterData00 extends CharacterData { 34 /* The character properties are currently encoded into 32 bits in the following manner: 35 1 bit mirrored property 36 4 bits directionality property 37 9 bits signed offset used for converting case 38 1 bit if 1, adding the signed offset converts the character to lowercase 39 1 bit if 1, subtracting the signed offset converts the character to uppercase 40 1 bit if 1, this character has a titlecase equivalent (possibly itself) 41 3 bits 0 may not be part of an identifier 42 1 ignorable control; may continue a Unicode identifier or Java identifier 43 2 may continue a Java identifier but not a Unicode identifier (unused) 44 3 may continue a Unicode identifier or Java identifier 45 4 is a Java whitespace character 46 5 may start or continue a Java identifier; 47 may continue but not start a Unicode identifier (underscores) 48 6 may start or continue a Java identifier but not a Unicode identifier ($) 49 7 may start or continue a Unicode identifier or Java identifier 50 Thus: 51 5, 6, 7 may start a Java identifier 52 1, 2, 3, 5, 6, 7 may continue a Java identifier 53 7 may start a Unicode identifier 54 1, 3, 5, 7 may continue a Unicode identifier 55 1 is ignorable within an identifier 56 4 is Java whitespace 57 2 bits 0 this character has no numeric property 58 1 adding the digit offset to the character code and then 59 masking with 0x1F will produce the desired numeric value 60 2 this character has a "strange" numeric value 61 3 a Java supradecimal digit: adding the digit offset to the 62 character code, then masking with 0x1F, then adding 10 63 will produce the desired numeric value 64 5 bits digit offset 65 5 bits character type 66 67 The encoding of character properties is subject to change at any time. 68 */ 69 70 int getProperties(int ch) { 71 char offset = (char)ch; 72 int props = $$Lookup(offset); 73 return props; 74 } 75 76 int getPropertiesEx(int ch) { 77 char offset = (char)ch; 78 int props = $$LookupEx(offset); 79 return props; 80 } 81 82 int getType(int ch) { 83 int props = getProperties(ch); 84 return (props & $$maskType); 85 } 86 87 boolean isOtherLowercase(int ch) { 88 int props = getPropertiesEx(ch); 89 return (props & $$maskOtherLowercase) != 0; 90 } 91 92 boolean isOtherUppercase(int ch) { 93 int props = getPropertiesEx(ch); 94 return (props & $$maskOtherUppercase) != 0; 95 } 96 97 boolean isOtherAlphabetic(int ch) { 98 int props = getPropertiesEx(ch); 99 return (props & $$maskOtherAlphabetic) != 0; 100 } 101 102 boolean isIdeographic(int ch) { 103 int props = getPropertiesEx(ch); 104 return (props & $$maskIdeographic) != 0; 105 } 106 107 boolean isJavaIdentifierStart(int ch) { 108 int props = getProperties(ch); 109 return ((props & $$maskIdentifierInfo) >= $$lowJavaStart); 110 } 111 112 boolean isJavaIdentifierPart(int ch) { 113 int props = getProperties(ch); 114 return ((props & $$nonzeroJavaPart) != 0); 115 } 116 117 boolean isUnicodeIdentifierStart(int ch) { 118 int props = getProperties(ch); 119 return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart); 120 } 121 122 boolean isUnicodeIdentifierPart(int ch) { 123 int props = getProperties(ch); 124 return ((props & $$maskUnicodePart) != 0); 125 } 126 127 boolean isIdentifierIgnorable(int ch) { 128 int props = getProperties(ch); 129 return ((props & $$maskIdentifierInfo) == $$valueIgnorable); 130 } 131 132 int toLowerCase(int ch) { 133 int mapChar = ch; 134 int val = getProperties(ch); 135 136 if ((val & $$maskLowerCase) != 0) { 137 if ((val & $$maskCaseOffset) == $$maskCaseOffset) { 138 switch(ch) { 139 // map the offset overflow chars 140 case 0x0130 : mapChar = 0x0069; break; 141 case 0x2126 : mapChar = 0x03C9; break; 142 case 0x212A : mapChar = 0x006B; break; 143 case 0x212B : mapChar = 0x00E5; break; 144 // map the titlecase chars with both a 1:M uppercase map 145 // and a lowercase map 146 case 0x1F88 : mapChar = 0x1F80; break; 147 case 0x1F89 : mapChar = 0x1F81; break; 148 case 0x1F8A : mapChar = 0x1F82; break; 149 case 0x1F8B : mapChar = 0x1F83; break; 150 case 0x1F8C : mapChar = 0x1F84; break; 151 case 0x1F8D : mapChar = 0x1F85; break; 152 case 0x1F8E : mapChar = 0x1F86; break; 153 case 0x1F8F : mapChar = 0x1F87; break; 154 case 0x1F98 : mapChar = 0x1F90; break; 155 case 0x1F99 : mapChar = 0x1F91; break; 156 case 0x1F9A : mapChar = 0x1F92; break; 157 case 0x1F9B : mapChar = 0x1F93; break; 158 case 0x1F9C : mapChar = 0x1F94; break; 159 case 0x1F9D : mapChar = 0x1F95; break; 160 case 0x1F9E : mapChar = 0x1F96; break; 161 case 0x1F9F : mapChar = 0x1F97; break; 162 case 0x1FA8 : mapChar = 0x1FA0; break; 163 case 0x1FA9 : mapChar = 0x1FA1; break; 164 case 0x1FAA : mapChar = 0x1FA2; break; 165 case 0x1FAB : mapChar = 0x1FA3; break; 166 case 0x1FAC : mapChar = 0x1FA4; break; 167 case 0x1FAD : mapChar = 0x1FA5; break; 168 case 0x1FAE : mapChar = 0x1FA6; break; 169 case 0x1FAF : mapChar = 0x1FA7; break; 170 case 0x1FBC : mapChar = 0x1FB3; break; 171 case 0x1FCC : mapChar = 0x1FC3; break; 172 case 0x1FFC : mapChar = 0x1FF3; break; 173 174 case 0x023A : mapChar = 0x2C65; break; 175 case 0x023E : mapChar = 0x2C66; break; 176 case 0x10A0 : mapChar = 0x2D00; break; 177 case 0x10A1 : mapChar = 0x2D01; break; 178 case 0x10A2 : mapChar = 0x2D02; break; 179 case 0x10A3 : mapChar = 0x2D03; break; 180 case 0x10A4 : mapChar = 0x2D04; break; 181 case 0x10A5 : mapChar = 0x2D05; break; 182 case 0x10A6 : mapChar = 0x2D06; break; 183 case 0x10A7 : mapChar = 0x2D07; break; 184 case 0x10A8 : mapChar = 0x2D08; break; 185 case 0x10A9 : mapChar = 0x2D09; break; 186 case 0x10AA : mapChar = 0x2D0A; break; 187 case 0x10AB : mapChar = 0x2D0B; break; 188 case 0x10AC : mapChar = 0x2D0C; break; 189 case 0x10AD : mapChar = 0x2D0D; break; 190 case 0x10AE : mapChar = 0x2D0E; break; 191 case 0x10AF : mapChar = 0x2D0F; break; 192 case 0x10B0 : mapChar = 0x2D10; break; 193 case 0x10B1 : mapChar = 0x2D11; break; 194 case 0x10B2 : mapChar = 0x2D12; break; 195 case 0x10B3 : mapChar = 0x2D13; break; 196 case 0x10B4 : mapChar = 0x2D14; break; 197 case 0x10B5 : mapChar = 0x2D15; break; 198 case 0x10B6 : mapChar = 0x2D16; break; 199 case 0x10B7 : mapChar = 0x2D17; break; 200 case 0x10B8 : mapChar = 0x2D18; break; 201 case 0x10B9 : mapChar = 0x2D19; break; 202 case 0x10BA : mapChar = 0x2D1A; break; 203 case 0x10BB : mapChar = 0x2D1B; break; 204 case 0x10BC : mapChar = 0x2D1C; break; 205 case 0x10BD : mapChar = 0x2D1D; break; 206 case 0x10BE : mapChar = 0x2D1E; break; 207 case 0x10BF : mapChar = 0x2D1F; break; 208 case 0x10C0 : mapChar = 0x2D20; break; 209 case 0x10C1 : mapChar = 0x2D21; break; 210 case 0x10C2 : mapChar = 0x2D22; break; 211 case 0x10C3 : mapChar = 0x2D23; break; 212 case 0x10C4 : mapChar = 0x2D24; break; 213 case 0x10C5 : mapChar = 0x2D25; break; 214 case 0x10C7 : mapChar = 0x2D27; break; 215 case 0x10CD : mapChar = 0x2D2D; break; 216 case 0x1E9E : mapChar = 0x00DF; break; 217 case 0x2C62 : mapChar = 0x026B; break; 218 case 0x2C63 : mapChar = 0x1D7D; break; 219 case 0x2C64 : mapChar = 0x027D; break; 220 case 0x2C6D : mapChar = 0x0251; break; 221 case 0x2C6E : mapChar = 0x0271; break; 222 case 0x2C6F : mapChar = 0x0250; break; 223 case 0x2C70 : mapChar = 0x0252; break; 224 case 0x2C7E : mapChar = 0x023F; break; 225 case 0x2C7F : mapChar = 0x0240; break; 226 case 0xA77D : mapChar = 0x1D79; break; 227 case 0xA78D : mapChar = 0x0265; break; 228 case 0xA7AA : mapChar = 0x0266; break; 229 // default mapChar is already set, so no 230 // need to redo it here. 231 // default : mapChar = ch; 232 } 233 } 234 else { 235 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 236 mapChar = ch + offset; 237 } 238 } 239 return mapChar; 240 } 241 242 int toUpperCase(int ch) { 243 int mapChar = ch; 244 int val = getProperties(ch); 245 246 if ((val & $$maskUpperCase) != 0) { 247 if ((val & $$maskCaseOffset) == $$maskCaseOffset) { 248 switch(ch) { 249 // map chars with overflow offsets 250 case 0x00B5 : mapChar = 0x039C; break; 251 case 0x017F : mapChar = 0x0053; break; 252 case 0x1FBE : mapChar = 0x0399; break; 253 // map char that have both a 1:1 and 1:M map 254 case 0x1F80 : mapChar = 0x1F88; break; 255 case 0x1F81 : mapChar = 0x1F89; break; 256 case 0x1F82 : mapChar = 0x1F8A; break; 257 case 0x1F83 : mapChar = 0x1F8B; break; 258 case 0x1F84 : mapChar = 0x1F8C; break; 259 case 0x1F85 : mapChar = 0x1F8D; break; 260 case 0x1F86 : mapChar = 0x1F8E; break; 261 case 0x1F87 : mapChar = 0x1F8F; break; 262 case 0x1F90 : mapChar = 0x1F98; break; 263 case 0x1F91 : mapChar = 0x1F99; break; 264 case 0x1F92 : mapChar = 0x1F9A; break; 265 case 0x1F93 : mapChar = 0x1F9B; break; 266 case 0x1F94 : mapChar = 0x1F9C; break; 267 case 0x1F95 : mapChar = 0x1F9D; break; 268 case 0x1F96 : mapChar = 0x1F9E; break; 269 case 0x1F97 : mapChar = 0x1F9F; break; 270 case 0x1FA0 : mapChar = 0x1FA8; break; 271 case 0x1FA1 : mapChar = 0x1FA9; break; 272 case 0x1FA2 : mapChar = 0x1FAA; break; 273 case 0x1FA3 : mapChar = 0x1FAB; break; 274 case 0x1FA4 : mapChar = 0x1FAC; break; 275 case 0x1FA5 : mapChar = 0x1FAD; break; 276 case 0x1FA6 : mapChar = 0x1FAE; break; 277 case 0x1FA7 : mapChar = 0x1FAF; break; 278 case 0x1FB3 : mapChar = 0x1FBC; break; 279 case 0x1FC3 : mapChar = 0x1FCC; break; 280 case 0x1FF3 : mapChar = 0x1FFC; break; 281 282 case 0x023F : mapChar = 0x2C7E; break; 283 case 0x0240 : mapChar = 0x2C7F; break; 284 case 0x0250 : mapChar = 0x2C6F; break; 285 case 0x0251 : mapChar = 0x2C6D; break; 286 case 0x0252 : mapChar = 0x2C70; break; 287 case 0x0265 : mapChar = 0xA78D; break; 288 case 0x0266 : mapChar = 0xA7AA; break; 289 case 0x026B : mapChar = 0x2C62; break; 290 case 0x0271 : mapChar = 0x2C6E; break; 291 case 0x027D : mapChar = 0x2C64; break; 292 case 0x1D79 : mapChar = 0xA77D; break; 293 case 0x1D7D : mapChar = 0x2C63; break; 294 case 0x2C65 : mapChar = 0x023A; break; 295 case 0x2C66 : mapChar = 0x023E; break; 296 case 0x2D00 : mapChar = 0x10A0; break; 297 case 0x2D01 : mapChar = 0x10A1; break; 298 case 0x2D02 : mapChar = 0x10A2; break; 299 case 0x2D03 : mapChar = 0x10A3; break; 300 case 0x2D04 : mapChar = 0x10A4; break; 301 case 0x2D05 : mapChar = 0x10A5; break; 302 case 0x2D06 : mapChar = 0x10A6; break; 303 case 0x2D07 : mapChar = 0x10A7; break; 304 case 0x2D08 : mapChar = 0x10A8; break; 305 case 0x2D09 : mapChar = 0x10A9; break; 306 case 0x2D0A : mapChar = 0x10AA; break; 307 case 0x2D0B : mapChar = 0x10AB; break; 308 case 0x2D0C : mapChar = 0x10AC; break; 309 case 0x2D0D : mapChar = 0x10AD; break; 310 case 0x2D0E : mapChar = 0x10AE; break; 311 case 0x2D0F : mapChar = 0x10AF; break; 312 case 0x2D10 : mapChar = 0x10B0; break; 313 case 0x2D11 : mapChar = 0x10B1; break; 314 case 0x2D12 : mapChar = 0x10B2; break; 315 case 0x2D13 : mapChar = 0x10B3; break; 316 case 0x2D14 : mapChar = 0x10B4; break; 317 case 0x2D15 : mapChar = 0x10B5; break; 318 case 0x2D16 : mapChar = 0x10B6; break; 319 case 0x2D17 : mapChar = 0x10B7; break; 320 case 0x2D18 : mapChar = 0x10B8; break; 321 case 0x2D19 : mapChar = 0x10B9; break; 322 case 0x2D1A : mapChar = 0x10BA; break; 323 case 0x2D1B : mapChar = 0x10BB; break; 324 case 0x2D1C : mapChar = 0x10BC; break; 325 case 0x2D1D : mapChar = 0x10BD; break; 326 case 0x2D1E : mapChar = 0x10BE; break; 327 case 0x2D1F : mapChar = 0x10BF; break; 328 case 0x2D20 : mapChar = 0x10C0; break; 329 case 0x2D21 : mapChar = 0x10C1; break; 330 case 0x2D22 : mapChar = 0x10C2; break; 331 case 0x2D23 : mapChar = 0x10C3; break; 332 case 0x2D24 : mapChar = 0x10C4; break; 333 case 0x2D25 : mapChar = 0x10C5; break; 334 case 0x2D27 : mapChar = 0x10C7; break; 335 case 0x2D2D : mapChar = 0x10CD; break; 336 // ch must have a 1:M case mapping, but we 337 // can't handle it here. Return ch. 338 // since mapChar is already set, no need 339 // to redo it here. 340 //default : mapChar = ch; 341 } 342 } 343 else { 344 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 345 mapChar = ch - offset; 346 } 347 } 348 return mapChar; 349 } 350 351 int toTitleCase(int ch) { 352 int mapChar = ch; 353 int val = getProperties(ch); 354 355 if ((val & $$maskTitleCase) != 0) { 356 // There is a titlecase equivalent. Perform further checks: 357 if ((val & $$maskUpperCase) == 0) { 358 // The character does not have an uppercase equivalent, so it must 359 // already be uppercase; so add 1 to get the titlecase form. 360 mapChar = ch + 1; 361 } 362 else if ((val & $$maskLowerCase) == 0) { 363 // The character does not have a lowercase equivalent, so it must 364 // already be lowercase; so subtract 1 to get the titlecase form. 365 mapChar = ch - 1; 366 } 367 // else { 368 // The character has both an uppercase equivalent and a lowercase 369 // equivalent, so it must itself be a titlecase form; return it. 370 // return ch; 371 //} 372 } 373 else if ((val & $$maskUpperCase) != 0) { 374 // This character has no titlecase equivalent but it does have an 375 // uppercase equivalent, so use that (subtract the signed case offset). 376 mapChar = toUpperCase(ch); 377 } 378 return mapChar; 379 } 380 381 int digit(int ch, int radix) { 382 int value = -1; 383 if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) { 384 int val = getProperties(ch); 385 int kind = val & $$maskType; 386 if (kind == Character.DECIMAL_DIGIT_NUMBER) { 387 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit; 388 } 389 else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) { 390 // Java supradecimal digit 391 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10; 392 } 393 } 394 return (value < radix) ? value : -1; 395 } 396 397 int getNumericValue(int ch) { 398 int val = getProperties(ch); 399 int retval = -1; 400 401 switch (val & $$maskNumericType) { 402 default: // cannot occur 403 case ($$valueNotNumeric): // not numeric 404 retval = -1; 405 break; 406 case ($$valueDigit): // simple numeric 407 retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit; 408 break; 409 case ($$valueStrangeNumeric) : // "strange" numeric 410 switch (ch) { 411 case 0x0BF1: retval = 100; break; // TAMIL NUMBER ONE HUNDRED 412 case 0x0BF2: retval = 1000; break; // TAMIL NUMBER ONE THOUSAND 413 case 0x1375: retval = 40; break; // ETHIOPIC NUMBER FORTY 414 case 0x1376: retval = 50; break; // ETHIOPIC NUMBER FIFTY 415 case 0x1377: retval = 60; break; // ETHIOPIC NUMBER SIXTY 416 case 0x1378: retval = 70; break; // ETHIOPIC NUMBER SEVENTY 417 case 0x1379: retval = 80; break; // ETHIOPIC NUMBER EIGHTY 418 case 0x137A: retval = 90; break; // ETHIOPIC NUMBER NINETY 419 case 0x137B: retval = 100; break; // ETHIOPIC NUMBER HUNDRED 420 case 0x137C: retval = 10000; break; // ETHIOPIC NUMBER TEN THOUSAND 421 case 0x215F: retval = 1; break; // FRACTION NUMERATOR ONE 422 case 0x216C: retval = 50; break; // ROMAN NUMERAL FIFTY 423 case 0x216D: retval = 100; break; // ROMAN NUMERAL ONE HUNDRED 424 case 0x216E: retval = 500; break; // ROMAN NUMERAL FIVE HUNDRED 425 case 0x216F: retval = 1000; break; // ROMAN NUMERAL ONE THOUSAND 426 case 0x217C: retval = 50; break; // SMALL ROMAN NUMERAL FIFTY 427 case 0x217D: retval = 100; break; // SMALL ROMAN NUMERAL ONE HUNDRED 428 case 0x217E: retval = 500; break; // SMALL ROMAN NUMERAL FIVE HUNDRED 429 case 0x217F: retval = 1000; break; // SMALL ROMAN NUMERAL ONE THOUSAND 430 case 0x2180: retval = 1000; break; // ROMAN NUMERAL ONE THOUSAND C D 431 case 0x2181: retval = 5000; break; // ROMAN NUMERAL FIVE THOUSAND 432 case 0x2182: retval = 10000; break; // ROMAN NUMERAL TEN THOUSAND 433 434 case 0x324B: retval = 40; break; 435 case 0x324C: retval = 50; break; 436 case 0x324D: retval = 60; break; 437 case 0x324E: retval = 70; break; 438 case 0x324F: retval = 80; break; 439 case 0x325C: retval = 32; break; 440 441 case 0x325D: retval = 33; break; // CIRCLED NUMBER THIRTY THREE 442 case 0x325E: retval = 34; break; // CIRCLED NUMBER THIRTY FOUR 443 case 0x325F: retval = 35; break; // CIRCLED NUMBER THIRTY FIVE 444 case 0x32B1: retval = 36; break; // CIRCLED NUMBER THIRTY SIX 445 case 0x32B2: retval = 37; break; // CIRCLED NUMBER THIRTY SEVEN 446 case 0x32B3: retval = 38; break; // CIRCLED NUMBER THIRTY EIGHT 447 case 0x32B4: retval = 39; break; // CIRCLED NUMBER THIRTY NINE 448 case 0x32B5: retval = 40; break; // CIRCLED NUMBER FORTY 449 case 0x32B6: retval = 41; break; // CIRCLED NUMBER FORTY ONE 450 case 0x32B7: retval = 42; break; // CIRCLED NUMBER FORTY TWO 451 case 0x32B8: retval = 43; break; // CIRCLED NUMBER FORTY THREE 452 case 0x32B9: retval = 44; break; // CIRCLED NUMBER FORTY FOUR 453 case 0x32BA: retval = 45; break; // CIRCLED NUMBER FORTY FIVE 454 case 0x32BB: retval = 46; break; // CIRCLED NUMBER FORTY SIX 455 case 0x32BC: retval = 47; break; // CIRCLED NUMBER FORTY SEVEN 456 case 0x32BD: retval = 48; break; // CIRCLED NUMBER FORTY EIGHT 457 case 0x32BE: retval = 49; break; // CIRCLED NUMBER FORTY NINE 458 case 0x32BF: retval = 50; break; // CIRCLED NUMBER FIFTY 459 460 case 0x0D71: retval = 100; break; // MALAYALAM NUMBER ONE HUNDRED 461 case 0x0D72: retval = 1000; break; // MALAYALAM NUMBER ONE THOUSAND 462 case 0x2186: retval = 50; break; // ROMAN NUMERAL FIFTY EARLY FORM 463 case 0x2187: retval = 50000; break; // ROMAN NUMERAL FIFTY THOUSAND 464 case 0x2188: retval = 100000; break; // ROMAN NUMERAL ONE HUNDRED THOUSAND 465 466 default: retval = -2; break; 467 } 468 break; 469 case ($$valueJavaSupradecimal): // Java supradecimal 470 retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10; 471 break; 472 } 473 return retval; 474 } 475 476 boolean isWhitespace(int ch) { 477 int props = getProperties(ch); 478 return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace); 479 } 480 481 byte getDirectionality(int ch) { 482 int val = getProperties(ch); 483 byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi); 484 if (directionality == 0xF ) { 485 switch(ch) { 486 case 0x202A : 487 // This is the only char with LRE 488 directionality = Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING; 489 break; 490 case 0x202B : 491 // This is the only char with RLE 492 directionality = Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING; 493 break; 494 case 0x202C : 495 // This is the only char with PDF 496 directionality = Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT; 497 break; 498 case 0x202D : 499 // This is the only char with LRO 500 directionality = Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE; 501 break; 502 case 0x202E : 503 // This is the only char with RLO 504 directionality = Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE; 505 break; 506 default : 507 directionality = Character.DIRECTIONALITY_UNDEFINED; 508 break; 509 } 510 } 511 return directionality; 512 } 513 514 boolean isMirrored(int ch) { 515 int props = getProperties(ch); 516 return ((props & $$maskMirrored) != 0); 517 } 518 519 int toUpperCaseEx(int ch) { 520 int mapChar = ch; 521 int val = getProperties(ch); 522 523 if ((val & $$maskUpperCase) != 0) { 524 if ((val & $$maskCaseOffset) != $$maskCaseOffset) { 525 int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); 526 mapChar = ch - offset; 527 } 528 else { 529 switch(ch) { 530 // map overflow characters 531 case 0x00B5 : mapChar = 0x039C; break; 532 case 0x017F : mapChar = 0x0053; break; 533 case 0x1FBE : mapChar = 0x0399; break; 534 535 case 0x023F : mapChar = 0x2C7E; break; 536 case 0x0240 : mapChar = 0x2C7F; break; 537 case 0x0250 : mapChar = 0x2C6F; break; 538 case 0x0251 : mapChar = 0x2C6D; break; 539 case 0x0252 : mapChar = 0x2C70; break; 540 case 0x0265 : mapChar = 0xA78D; break; 541 case 0x0266 : mapChar = 0xA7AA; break; 542 case 0x026B : mapChar = 0x2C62; break; 543 case 0x0271 : mapChar = 0x2C6E; break; 544 case 0x027D : mapChar = 0x2C64; break; 545 case 0x1D79 : mapChar = 0xA77D; break; 546 case 0x1D7D : mapChar = 0x2C63; break; 547 case 0x2C65 : mapChar = 0x023A; break; 548 case 0x2C66 : mapChar = 0x023E; break; 549 case 0x2D00 : mapChar = 0x10A0; break; 550 case 0x2D01 : mapChar = 0x10A1; break; 551 case 0x2D02 : mapChar = 0x10A2; break; 552 case 0x2D03 : mapChar = 0x10A3; break; 553 case 0x2D04 : mapChar = 0x10A4; break; 554 case 0x2D05 : mapChar = 0x10A5; break; 555 case 0x2D06 : mapChar = 0x10A6; break; 556 case 0x2D07 : mapChar = 0x10A7; break; 557 case 0x2D08 : mapChar = 0x10A8; break; 558 case 0x2D09 : mapChar = 0x10A9; break; 559 case 0x2D0A : mapChar = 0x10AA; break; 560 case 0x2D0B : mapChar = 0x10AB; break; 561 case 0x2D0C : mapChar = 0x10AC; break; 562 case 0x2D0D : mapChar = 0x10AD; break; 563 case 0x2D0E : mapChar = 0x10AE; break; 564 case 0x2D0F : mapChar = 0x10AF; break; 565 case 0x2D10 : mapChar = 0x10B0; break; 566 case 0x2D11 : mapChar = 0x10B1; break; 567 case 0x2D12 : mapChar = 0x10B2; break; 568 case 0x2D13 : mapChar = 0x10B3; break; 569 case 0x2D14 : mapChar = 0x10B4; break; 570 case 0x2D15 : mapChar = 0x10B5; break; 571 case 0x2D16 : mapChar = 0x10B6; break; 572 case 0x2D17 : mapChar = 0x10B7; break; 573 case 0x2D18 : mapChar = 0x10B8; break; 574 case 0x2D19 : mapChar = 0x10B9; break; 575 case 0x2D1A : mapChar = 0x10BA; break; 576 case 0x2D1B : mapChar = 0x10BB; break; 577 case 0x2D1C : mapChar = 0x10BC; break; 578 case 0x2D1D : mapChar = 0x10BD; break; 579 case 0x2D1E : mapChar = 0x10BE; break; 580 case 0x2D1F : mapChar = 0x10BF; break; 581 case 0x2D20 : mapChar = 0x10C0; break; 582 case 0x2D21 : mapChar = 0x10C1; break; 583 case 0x2D22 : mapChar = 0x10C2; break; 584 case 0x2D23 : mapChar = 0x10C3; break; 585 case 0x2D24 : mapChar = 0x10C4; break; 586 case 0x2D25 : mapChar = 0x10C5; break; 587 case 0x2D27 : mapChar = 0x10C7; break; 588 case 0x2D2D : mapChar = 0x10CD; break; 589 default : mapChar = Character.ERROR; break; 590 } 591 } 592 } 593 return mapChar; 594 } 595 596 char[] toUpperCaseCharArray(int ch) { 597 char[] upperMap = {(char)ch}; 598 int location = findInCharMap(ch); 599 if (location != -1) { 600 upperMap = charMap[location][1]; 601 } 602 return upperMap; 603 } 604 605 606 /** 607 * Finds the character in the uppercase mapping table. 608 * 609 * @param ch the <code>char</code> to search 610 * @return the index location ch in the table or -1 if not found 611 * @since 1.4 612 */ 613 int findInCharMap(int ch) { 614 if (charMap == null || charMap.length == 0) { 615 return -1; 616 } 617 int top, bottom, current; 618 bottom = 0; 619 top = charMap.length; 620 current = top/2; 621 // invariant: top > current >= bottom && ch >= CharacterData.charMap[bottom][0] 622 while (top - bottom > 1) { 623 if (ch >= charMap[current][0][0]) { 624 bottom = current; 625 } else { 626 top = current; 627 } 628 current = (top + bottom) / 2; 629 } 630 if (ch == charMap[current][0][0]) return current; 631 else return -1; 632 } 633 634 static final CharacterData00 instance = new CharacterData00(); 635 private CharacterData00() {}; 636 637 $$Tables 638 639 static { 640 $$Initializers 641 } 642 }