1 /* 2 * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package org.openjdk.buildtools.generatecharacter; 27 28 import java.io.BufferedReader; 29 import java.io.FileReader; 30 import java.io.FileNotFoundException; 31 import java.io.IOException; 32 import java.util.StringTokenizer; 33 import java.io.File; 34 import java.util.regex.Pattern; 35 import java.util.ArrayList; 36 37 /** 38 * The UnicodeSpec class provides a way to read in Unicode character 39 * properties from a Unicode data file. One instance of class UnicodeSpec 40 * holds a decoded version of one line of the data file. The file may 41 * be obtained from www.unicode.org. The method readSpecFile returns an array 42 * of UnicodeSpec objects. 43 * @author Guy Steele 44 * @author John O'Conner 45 */ 46 47 public class UnicodeSpec { 48 49 private static final int MAP_UNDEFINED = 0xFFFFFFFF; 50 51 /** 52 * Construct a default UnicodeSpec object, with a default 53 * code point value 0xFFFF. 54 * 55 */ 56 public UnicodeSpec() { 57 this(0xffff); 58 } 59 60 /** 61 * Construct a UnicodeSpec object for the given <code>codePoint<code> 62 * argument. Provide default properties. 63 * @param codePoint a Unicode code point between 0x0000 and 0x10FFFF 64 */ 65 public UnicodeSpec(int codePoint) { 66 this.codePoint = codePoint; 67 generalCategory = UNASSIGNED; 68 bidiCategory = DIRECTIONALITY_UNDEFINED; 69 mirrored = false; 70 titleMap = MAP_UNDEFINED; 71 upperMap = MAP_UNDEFINED; 72 lowerMap = MAP_UNDEFINED; 73 decimalValue = -1; 74 digitValue = -1; 75 numericValue = ""; 76 oldName = null; 77 comment = null; 78 name = null; 79 } 80 81 /** 82 * Create a String representation of this UnicodeSpec object. 83 * The string will contain the code point and all its case mappings 84 * if available. 85 */ 86 public String toString() { 87 StringBuffer result = new StringBuffer(hex6(codePoint)); 88 if (getUpperMap() != MAP_UNDEFINED) { 89 result.append(", upper=").append(hex6(upperMap)); 90 } 91 if (getLowerMap() != MAP_UNDEFINED) { 92 result.append(", lower=").append(hex6(lowerMap)); 93 } 94 if (getTitleMap() != MAP_UNDEFINED) { 95 result.append(", title=").append(hex6(titleMap)); 96 } 97 return result.toString(); 98 } 99 100 static String hex4(int n) { 101 String q = Integer.toHexString(n & 0xFFFF).toUpperCase(); 102 return "0000".substring(Math.min(4, q.length())) + q; 103 } 104 105 static String hex6(int n) { 106 String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase(); 107 return "000000".substring(Math.min(6, str.length())) + str; 108 109 } 110 111 112 /** 113 * Given one line of a Unicode data file as a String, parse the line 114 * and return a UnicodeSpec object that contains the same character information. 115 * 116 * @param s a line of the Unicode data file to be parsed 117 * @return a UnicodeSpec object, or null if the parsing process failed for some reason 118 */ 119 public static UnicodeSpec parse(String s) { 120 UnicodeSpec spec = null; 121 String[] tokens = null; 122 123 try { 124 tokens = tokenSeparator.split(s, REQUIRED_FIELDS); 125 spec = new UnicodeSpec(); 126 spec.setCodePoint(parseCodePoint(tokens[FIELD_VALUE])); 127 spec.setName(parseName(tokens[FIELD_NAME])); 128 spec.setGeneralCategory(parseGeneralCategory(tokens[FIELD_CATEGORY])); 129 spec.setBidiCategory(parseBidiCategory(tokens[FIELD_BIDI])); 130 spec.setCombiningClass(parseCombiningClass(tokens[FIELD_CLASS])); 131 spec.setDecomposition(parseDecomposition(tokens[FIELD_DECOMPOSITION])); 132 spec.setDecimalValue(parseDecimalValue(tokens[FIELD_DECIMAL])); 133 spec.setDigitValue(parseDigitValue(tokens[FIELD_DIGIT])); 134 spec.setNumericValue(parseNumericValue(tokens[FIELD_NUMERIC])); 135 spec.setMirrored(parseMirrored(tokens[FIELD_MIRRORED])); 136 spec.setOldName(parseOldName(tokens[FIELD_OLDNAME])); 137 spec.setComment(parseComment(tokens[FIELD_COMMENT])); 138 spec.setUpperMap(parseUpperMap(tokens[FIELD_UPPERCASE])); 139 spec.setLowerMap(parseLowerMap(tokens[FIELD_LOWERCASE])); 140 spec.setTitleMap(parseTitleMap(tokens[FIELD_TITLECASE])); 141 } 142 143 catch(Exception e) { 144 spec = null; 145 System.out.println("Error parsing spec line."); 146 } 147 return spec; 148 } 149 150 /** 151 * Parse the codePoint attribute for a Unicode character. If the parse succeeds, 152 * the codePoint field of this UnicodeSpec object is updated and false is returned. 153 * 154 * The codePoint attribute should be a four to six digit hexadecimal integer. 155 * 156 * @param s the codePoint attribute extracted from a line of the Unicode data file 157 * @return code point if successful 158 * @exception NumberFormatException if unable to parse argument 159 */ 160 public static int parseCodePoint(String s) throws NumberFormatException { 161 return Integer.parseInt(s, 16); 162 } 163 164 public static String parseName(String s) throws Exception { 165 if (s==null) throw new Exception("Cannot parse name."); 166 return s; 167 } 168 169 public static byte parseGeneralCategory(String s) throws Exception { 170 byte category = GENERAL_CATEGORY_COUNT; 171 172 for (byte x=0; x<generalCategoryList.length; x++) { 173 if (s.equals(generalCategoryList[x][SHORT])) { 174 category = x; 175 break; 176 } 177 } 178 if (category >= GENERAL_CATEGORY_COUNT) { 179 throw new Exception("Could not parse general category."); 180 } 181 return category; 182 } 183 184 public static byte parseBidiCategory(String s) throws Exception { 185 byte category = DIRECTIONALITY_CATEGORY_COUNT; 186 187 for (byte x=0; x<bidiCategoryList.length; x++) { 188 if (s.equals(bidiCategoryList[x][SHORT])) { 189 category = x; 190 break; 191 } 192 } 193 if (category >= DIRECTIONALITY_CATEGORY_COUNT) { 194 throw new Exception("Could not parse bidi category."); 195 } 196 return category; 197 } 198 199 200 /** 201 * Parse the combining attribute for a Unicode character. If there is a combining 202 * attribute and the parse succeeds, then the hasCombining field is set to true, 203 * the combining field of this UnicodeSpec object is updated, and false is returned. 204 * If the combining attribute is an empty string, the parse succeeds but the 205 * hasCombining field is set to false. (and false is returned). 206 * 207 * The combining attribute, if any, should be a nonnegative decimal integer. 208 * 209 * @param s the combining attribute extracted from a line of the Unicode data file 210 * @return the combining class value if any, -1 if property not defined 211 * @exception Exception if can't parse the combining class 212 */ 213 214 public static int parseCombiningClass(String s) throws Exception { 215 int combining = -1; 216 if (s.length()>0) { 217 combining = Integer.parseInt(s, 10); 218 } 219 return combining; 220 } 221 222 /** 223 * Parse the decomposition attribute for a Unicode character. If the parse succeeds, 224 * the decomposition field of this UnicodeSpec object is updated and false is returned. 225 * 226 * The decomposition attribute is complicated; for now, it is treated as a string. 227 * 228 * @param s the decomposition attribute extracted from a line of the Unicode data file 229 * @return true if the parse failed; otherwise false 230 */ 231 232 public static String parseDecomposition(String s) throws Exception { 233 if (s==null) throw new Exception("Cannot parse decomposition."); 234 return s; 235 } 236 237 238 /** 239 * Parse the decimal value attribute for a Unicode character. If there is a decimal value 240 * attribute and the parse succeeds, then the hasDecimalValue field is set to true, 241 * the decimalValue field of this UnicodeSpec object is updated, and false is returned. 242 * If the decimal value attribute is an empty string, the parse succeeds but the 243 * hasDecimalValue field is set to false. (and false is returned). 244 * 245 * The decimal value attribute, if any, should be a nonnegative decimal integer. 246 * 247 * @param s the decimal value attribute extracted from a line of the Unicode data file 248 * @return the decimal value as an int, -1 if no decimal value defined 249 * @exception NumberFormatException if the parse fails 250 */ 251 public static int parseDecimalValue(String s) throws NumberFormatException { 252 int value = -1; 253 254 if (s.length() > 0) { 255 value = Integer.parseInt(s, 10); 256 } 257 return value; 258 } 259 260 /** 261 * Parse the digit value attribute for a Unicode character. If there is a digit value 262 * attribute and the parse succeeds, then the hasDigitValue field is set to true, 263 * the digitValue field of this UnicodeSpec object is updated, and false is returned. 264 * If the digit value attribute is an empty string, the parse succeeds but the 265 * hasDigitValue field is set to false. (and false is returned). 266 * 267 * The digit value attribute, if any, should be a nonnegative decimal integer. 268 * 269 * @param s the digit value attribute extracted from a line of the Unicode data file 270 * @return the digit value as an non-negative int, or -1 if no digit property defined 271 * @exception NumberFormatException if the parse fails 272 */ 273 public static int parseDigitValue(String s) throws NumberFormatException { 274 int value = -1; 275 276 if (s.length() > 0) { 277 value = Integer.parseInt(s, 10); 278 } 279 return value; 280 } 281 282 public static String parseNumericValue(String s) throws Exception { 283 if (s == null) throw new Exception("Cannot parse numeric value."); 284 return s; 285 } 286 287 public static String parseComment(String s) throws Exception { 288 if (s == null) throw new Exception("Cannot parse comment."); 289 return s; 290 } 291 292 public static boolean parseMirrored(String s) throws Exception { 293 boolean mirrored; 294 if (s.length() == 1) { 295 if (s.charAt(0) == 'Y') {mirrored = true;} 296 else if (s.charAt(0) == 'N') {mirrored = false;} 297 else {throw new Exception("Cannot parse mirrored property.");} 298 } 299 else { throw new Exception("Cannot parse mirrored property.");} 300 return mirrored; 301 } 302 303 public static String parseOldName(String s) throws Exception { 304 if (s == null) throw new Exception("Cannot parse old name"); 305 return s; 306 } 307 308 /** 309 * Parse the uppercase mapping attribute for a Unicode character. If there is a uppercase 310 * mapping attribute and the parse succeeds, then the hasUpperMap field is set to true, 311 * the upperMap field of this UnicodeSpec object is updated, and false is returned. 312 * If the uppercase mapping attribute is an empty string, the parse succeeds but the 313 * hasUpperMap field is set to false. (and false is returned). 314 * 315 * The uppercase mapping attribute should be a four to six digit hexadecimal integer. 316 * 317 * @param s the uppercase mapping attribute extracted from a line of the Unicode data file 318 * @return simple uppercase character mapping if defined, MAP_UNDEFINED otherwise 319 * @exception NumberFormatException if parse fails 320 */ 321 public static int parseUpperMap(String s) throws NumberFormatException { 322 int upperCase = MAP_UNDEFINED; 323 324 int length = s.length(); 325 if (length >= 4 && length <=6) { 326 upperCase = Integer.parseInt(s, 16); 327 } 328 else if (s.length() != 0) { 329 throw new NumberFormatException(); 330 } 331 return upperCase; 332 } 333 334 /** 335 * Parse the lowercase mapping attribute for a Unicode character. If there is a lowercase 336 * mapping attribute and the parse succeeds, then the hasLowerMap field is set to true, 337 * the lowerMap field of this UnicodeSpec object is updated, and false is returned. 338 * If the lowercase mapping attribute is an empty string, the parse succeeds but the 339 * hasLowerMap field is set to false. (and false is returned). 340 * 341 * The lowercase mapping attribute should be a four to six digit hexadecimal integer. 342 * 343 * @param s the lowercase mapping attribute extracted from a line of the Unicode data file 344 * @return simple lowercase character mapping if defined, MAP_UNDEFINED otherwise 345 * @exception NumberFormatException if parse fails 346 */ 347 public static int parseLowerMap(String s) throws NumberFormatException { 348 int lowerCase = MAP_UNDEFINED; 349 int length = s.length(); 350 if (length >= 4 && length <= 6) { 351 lowerCase = Integer.parseInt(s, 16); 352 } 353 else if (s.length() != 0) { 354 throw new NumberFormatException(); 355 } 356 return lowerCase; 357 } 358 359 /** 360 * Parse the titlecase mapping attribute for a Unicode character. If there is a titlecase 361 * mapping attribute and the parse succeeds, then the hasTitleMap field is set to true, 362 * the titleMap field of this UnicodeSpec object is updated, and false is returned. 363 * If the titlecase mapping attribute is an empty string, the parse succeeds but the 364 * hasTitleMap field is set to false. (and false is returned). 365 * 366 * The titlecase mapping attribute should be a four to six digit hexadecimal integer. 367 * 368 * @param s the titlecase mapping attribute extracted from a line of the Unicode data file 369 * @return simple title case char mapping if defined, MAP_UNDEFINED otherwise 370 * @exception NumberFormatException if parse fails 371 */ 372 public static int parseTitleMap(String s) throws NumberFormatException { 373 int titleCase = MAP_UNDEFINED; 374 int length = s.length(); 375 if (length >= 4 && length <= 6) { 376 titleCase = Integer.parseInt(s, 16); 377 } 378 else if (s.length() != 0) { 379 throw new NumberFormatException(); 380 } 381 return titleCase; 382 } 383 384 /** 385 * Read and parse a Unicode data file. 386 * 387 * @param file a file specifying the Unicode data file to be read 388 * @return an array of UnicodeSpec objects, one for each line of the 389 * Unicode data file that could be successfully parsed as 390 * specifying Unicode character attributes 391 */ 392 393 public static UnicodeSpec[] readSpecFile(File file, int plane) throws FileNotFoundException { 394 ArrayList<UnicodeSpec> list = new ArrayList<>(3000); 395 UnicodeSpec[] result = null; 396 int count = 0; 397 BufferedReader f = new BufferedReader(new FileReader(file)); 398 String line = null; 399 loop: 400 while(true) { 401 try { 402 line = f.readLine(); 403 } 404 catch (IOException e) { 405 break loop; 406 } 407 if (line == null) break loop; 408 UnicodeSpec item = parse(line.trim()); 409 int specPlane = item.getCodePoint() >>> 16; 410 if (specPlane < plane) continue; 411 if (specPlane > plane) break; 412 413 if (item != null) { 414 list.add(item); 415 } 416 } 417 result = new UnicodeSpec[list.size()]; 418 list.toArray(result); 419 return result; 420 } 421 422 void setCodePoint(int value) { 423 codePoint = value; 424 } 425 426 /** 427 * Return the code point in this Unicode specification 428 * @return the char code point representing by the specification 429 */ 430 public int getCodePoint() { 431 return codePoint; 432 } 433 434 void setName(String name) { 435 this.name = name; 436 } 437 438 public String getName() { 439 return name; 440 } 441 442 void setGeneralCategory(byte category) { 443 generalCategory = category; 444 } 445 446 public byte getGeneralCategory() { 447 return generalCategory; 448 } 449 450 void setBidiCategory(byte category) { 451 bidiCategory = category; 452 } 453 454 public byte getBidiCategory() { 455 return bidiCategory; 456 } 457 458 void setCombiningClass(int combiningClass) { 459 this.combiningClass = combiningClass; 460 } 461 462 public int getCombiningClass() { 463 return combiningClass; 464 } 465 466 void setDecomposition(String decomposition) { 467 this.decomposition = decomposition; 468 } 469 470 public String getDecomposition() { 471 return decomposition; 472 } 473 474 void setDecimalValue(int value) { 475 decimalValue = value; 476 } 477 478 public int getDecimalValue() { 479 return decimalValue; 480 } 481 482 public boolean isDecimalValue() { 483 return decimalValue != -1; 484 } 485 486 void setDigitValue(int value) { 487 digitValue = value; 488 } 489 490 public int getDigitValue() { 491 return digitValue; 492 } 493 494 public boolean isDigitValue() { 495 return digitValue != -1; 496 } 497 498 void setNumericValue(String value) { 499 numericValue = value; 500 } 501 502 public String getNumericValue() { 503 return numericValue; 504 } 505 506 public boolean isNumericValue() { 507 return numericValue.length() > 0; 508 } 509 510 void setMirrored(boolean value) { 511 mirrored = value; 512 } 513 514 public boolean isMirrored() { 515 return mirrored; 516 } 517 518 void setOldName(String name) { 519 oldName = name; 520 } 521 522 public String getOldName() { 523 return oldName; 524 } 525 526 void setComment(String comment) { 527 this.comment = comment; 528 } 529 530 public String getComment() { 531 return comment; 532 } 533 534 void setUpperMap(int ch) { 535 upperMap = ch; 536 }; 537 538 public int getUpperMap() { 539 return upperMap; 540 } 541 542 public boolean hasUpperMap() { 543 return upperMap != MAP_UNDEFINED; 544 } 545 546 void setLowerMap(int ch) { 547 lowerMap = ch; 548 } 549 550 public int getLowerMap() { 551 return lowerMap; 552 } 553 554 public boolean hasLowerMap() { 555 return lowerMap != MAP_UNDEFINED; 556 } 557 558 void setTitleMap(int ch) { 559 titleMap = ch; 560 } 561 562 public int getTitleMap() { 563 return titleMap; 564 } 565 566 public boolean hasTitleMap() { 567 return titleMap != MAP_UNDEFINED; 568 } 569 570 int codePoint; // the characters UTF-32 code value 571 String name; // the ASCII name 572 byte generalCategory; // general category, available via Characte.getType() 573 byte bidiCategory; // available via Character.getBidiType() 574 int combiningClass; // not used in Character 575 String decomposition; // not used in Character 576 int decimalValue; // decimal digit value 577 int digitValue; // not all digits are decimal 578 String numericValue; // numeric value if digit or non-digit 579 boolean mirrored; // 580 String oldName; 581 String comment; 582 int upperMap; 583 int lowerMap; 584 int titleMap; 585 586 // this is the number of fields in one line of the UnicodeData.txt file 587 // each field is separated by a semicolon (a token) 588 static final int REQUIRED_FIELDS = 15; 589 590 /** 591 * General category types 592 * To preserve compatibility, these values cannot be changed 593 */ 594 public static final byte 595 UNASSIGNED = 0, // Cn normative 596 UPPERCASE_LETTER = 1, // Lu normative 597 LOWERCASE_LETTER = 2, // Ll normative 598 TITLECASE_LETTER = 3, // Lt normative 599 MODIFIER_LETTER = 4, // Lm normative 600 OTHER_LETTER = 5, // Lo normative 601 NON_SPACING_MARK = 6, // Mn informative 602 ENCLOSING_MARK = 7, // Me informative 603 COMBINING_SPACING_MARK = 8, // Mc normative 604 DECIMAL_DIGIT_NUMBER = 9, // Nd normative 605 LETTER_NUMBER = 10, // Nl normative 606 OTHER_NUMBER = 11, // No normative 607 SPACE_SEPARATOR = 12, // Zs normative 608 LINE_SEPARATOR = 13, // Zl normative 609 PARAGRAPH_SEPARATOR = 14, // Zp normative 610 CONTROL = 15, // Cc normative 611 FORMAT = 16, // Cf normative 612 // 17 is unused for no apparent reason, 613 // but must preserve forward compatibility 614 PRIVATE_USE = 18, // Co normative 615 SURROGATE = 19, // Cs normative 616 DASH_PUNCTUATION = 20, // Pd informative 617 START_PUNCTUATION = 21, // Ps informative 618 END_PUNCTUATION = 22, // Pe informative 619 CONNECTOR_PUNCTUATION = 23, // Pc informative 620 OTHER_PUNCTUATION = 24, // Po informative 621 MATH_SYMBOL = 25, // Sm informative 622 CURRENCY_SYMBOL = 26, // Sc informative 623 MODIFIER_SYMBOL = 27, // Sk informative 624 OTHER_SYMBOL = 28, // So informative 625 INITIAL_QUOTE_PUNCTUATION = 29, // Pi informative 626 FINAL_QUOTE_PUNCTUATION = 30, // Pf informative 627 628 // this value is only used in the character generation tool 629 // it can change to accommodate the addition of new categories. 630 GENERAL_CATEGORY_COUNT = 31; // sentinel value 631 632 static final byte SHORT = 0, LONG = 1; 633 // general category type strings 634 // NOTE: The order of this category array is dependent on the assignment of 635 // category constants above. We want to access this array using constants above. 636 // [][SHORT] is the SHORT name, [][LONG] is the LONG name 637 static final String[][] generalCategoryList = { 638 {"Cn", "UNASSIGNED"}, 639 {"Lu", "UPPERCASE_LETTER"}, 640 {"Ll", "LOWERCASE_LETTER"}, 641 {"Lt", "TITLECASE_LETTER"}, 642 {"Lm", "MODIFIER_LETTER"}, 643 {"Lo", "OTHER_LETTER"}, 644 {"Mn", "NON_SPACING_MARK"}, 645 {"Me", "ENCLOSING_MARK"}, 646 {"Mc", "COMBINING_SPACING_MARK"}, 647 {"Nd", "DECIMAL_DIGIT_NUMBER"}, 648 {"Nl", "LETTER_NUMBER"}, 649 {"No", "OTHER_NUMBER"}, 650 {"Zs", "SPACE_SEPARATOR"}, 651 {"Zl", "LINE_SEPARATOR"}, 652 {"Zp", "PARAGRAPH_SEPARATOR"}, 653 {"Cc", "CONTROL"}, 654 {"Cf", "FORMAT"}, 655 {"xx", "unused"}, 656 {"Co", "PRIVATE_USE"}, 657 {"Cs", "SURROGATE"}, 658 {"Pd", "DASH_PUNCTUATION"}, 659 {"Ps", "START_PUNCTUATION"}, 660 {"Pe", "END_PUNCTUATION"}, 661 {"Pc", "CONNECTOR_PUNCTUATION"}, 662 {"Po", "OTHER_PUNCTUATION"}, 663 {"Sm", "MATH_SYMBOL"}, 664 {"Sc", "CURRENCY_SYMBOL"}, 665 {"Sk", "MODIFIER_SYMBOL"}, 666 {"So", "OTHER_SYMBOL"}, 667 {"Pi", "INITIAL_QUOTE_PUNCTUATION"}, 668 {"Pf", "FINAL_QUOTE_PUNCTUATION"} 669 }; 670 671 /** 672 * Bidirectional categories 673 */ 674 public static final byte 675 DIRECTIONALITY_UNDEFINED = -1, 676 677 // Strong category 678 DIRECTIONALITY_LEFT_TO_RIGHT = 0, // L 679 DIRECTIONALITY_RIGHT_TO_LEFT = 1, // R 680 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2, // AL 681 // Weak category 682 DIRECTIONALITY_EUROPEAN_NUMBER = 3, // EN 683 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4, // ES 684 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5, // ET 685 DIRECTIONALITY_ARABIC_NUMBER = 6, // AN 686 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7, // CS 687 DIRECTIONALITY_NONSPACING_MARK = 8, // NSM 688 DIRECTIONALITY_BOUNDARY_NEUTRAL = 9, // BN 689 // Neutral category 690 DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10, // B 691 DIRECTIONALITY_SEGMENT_SEPARATOR = 11, // S 692 DIRECTIONALITY_WHITESPACE = 12, // WS 693 DIRECTIONALITY_OTHER_NEUTRALS = 13, // ON 694 // Explicit Formatting category 695 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14, // LRE 696 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15, // LRO 697 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16, // RLE 698 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17, // RLO 699 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18, // PDF 700 DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19, // LRI 701 DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20, // RLI 702 DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21, // FSI 703 DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22, // PDI 704 705 DIRECTIONALITY_CATEGORY_COUNT = 23; // sentinel value 706 707 // If changes are made to the above bidi category assignments, this 708 // list of bidi category names must be changed to keep their order in synch. 709 // Access this list using the bidi category constants above. 710 static final String[][] bidiCategoryList = { 711 {"L", "DIRECTIONALITY_LEFT_TO_RIGHT"}, 712 {"R", "DIRECTIONALITY_RIGHT_TO_LEFT"}, 713 {"AL", "DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC"}, 714 {"EN", "DIRECTIONALITY_EUROPEAN_NUMBER"}, 715 {"ES", "DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR"}, 716 {"ET", "DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR"}, 717 {"AN", "DIRECTIONALITY_ARABIC_NUMBER"}, 718 {"CS", "DIRECTIONALITY_COMMON_NUMBER_SEPARATOR"}, 719 {"NSM", "DIRECTIONALITY_NONSPACING_MARK"}, 720 {"BN", "DIRECTIONALITY_BOUNDARY_NEUTRAL"}, 721 {"B", "DIRECTIONALITY_PARAGRAPH_SEPARATOR"}, 722 {"S", "DIRECTIONALITY_SEGMENT_SEPARATOR"}, 723 {"WS", "DIRECTIONALITY_WHITESPACE"}, 724 {"ON", "DIRECTIONALITY_OTHER_NEUTRALS"}, 725 {"LRE", "DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING"}, 726 {"LRO", "DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE"}, 727 {"RLE", "DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING"}, 728 {"RLO", "DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE"}, 729 {"PDF", "DIRECTIONALITY_POP_DIRECTIONAL_FORMAT"}, 730 {"LRI", "DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE"}, 731 {"RLI", "DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE"}, 732 {"FSI", "DIRECTIONALITY_FIRST_STRONG_ISOLATE"}, 733 {"PDI", "DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE"}, 734 }; 735 736 // Unicode specification lines have fields in this order. 737 static final byte 738 FIELD_VALUE = 0, 739 FIELD_NAME = 1, 740 FIELD_CATEGORY = 2, 741 FIELD_CLASS = 3, 742 FIELD_BIDI = 4, 743 FIELD_DECOMPOSITION = 5, 744 FIELD_DECIMAL = 6, 745 FIELD_DIGIT = 7, 746 FIELD_NUMERIC = 8, 747 FIELD_MIRRORED = 9, 748 FIELD_OLDNAME = 10, 749 FIELD_COMMENT = 11, 750 FIELD_UPPERCASE = 12, 751 FIELD_LOWERCASE = 13, 752 FIELD_TITLECASE = 14; 753 754 static final Pattern tokenSeparator = Pattern.compile(";"); 755 756 public static void main(String[] args) { 757 UnicodeSpec[] spec = null; 758 if (args.length == 2 ) { 759 try { 760 File file = new File(args[0]); 761 int plane = Integer.parseInt(args[1]); 762 spec = UnicodeSpec.readSpecFile(file, plane); 763 System.out.println("UnicodeSpec[" + spec.length + "]:"); 764 for (int x=0; x<spec.length; x++) { 765 System.out.println(spec[x].toString()); 766 } 767 } 768 catch(Exception e) { 769 e.printStackTrace(); 770 } 771 } 772 773 } 774 775 }