< prev index next >

make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java

Print this page
rev 56092 : imported patch 8229831
   1 /*
   2  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  56  * markers consisting of an alphabetic name string preceded by "$$".
  57  * Such markers are replaced with generated program text.  As a special
  58  * case, the marker "Lookup(xxx)" is recognized, where "xxx" consists of
  59  * alphabetic characters constituting a variable name.  The character "_"
  60  * is considered alphabetic for these purposes.
  61  *
  62  * @author  Guy Steele
  63  * @author  Alan Liu
  64  * @author  John O'Conner
  65  */
  66 
  67 public class GenerateCharacter {
  68 
  69     final static boolean DEBUG = false;
  70 
  71     final static String commandMarker = "$$";
  72     static String ROOT                        = "";
  73     static String DefaultUnicodeSpecFileName  = ROOT + "UnicodeData.txt";
  74     static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
  75     static String DefaultPropListFileName     = ROOT + "PropList.txt";

  76     static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
  77     static String DefaultJavaOutputFileName   = ROOT + "Character.java";
  78     static String DefaultCTemplateFileName    = ROOT + "Character.c.template";
  79     static String DefaultCOutputFileName      = ROOT + "Character.c";
  80 
  81     static int plane = 0;
  82 
  83     /* The overall idea is that, in the generated Character class source code,
  84     most character property data is stored in a special multi-level table whose
  85     structure is defined by a sequence of nonnegative integers [k1, k2, ..., kn].
  86     The integers must sum to 16 (the number of bits in a character).
  87     The first table is indexed by the k1 high-order bits of the character code.
  88     The result is concatenated to the next k2 bits of the character code to index
  89     the second table, and so on.  Eventually the kn low-order bits of the character
  90     code are concatenated and used to index one of two tables A and B; A contains
  91     32-bit integer entries and B contains 16-bit short entries.  The 48 bits that
  92     can be thus obtained encode the properties for the character.
  93 
  94     The default specification is [9, 4, 3, 0].  This particular table format was
  95     designed by conducting an exhaustive search of table formats to minimize the


 142            4 is Java whitespace
 143     2 bits      This field indicates whether the character has a numeric property.
 144         The four possible values for this field are as follows:
 145         0  This character has no numeric property.
 146         1  Adding the digit offset to the character code and then
 147            masking with 0x1F will produce the desired numeric value.
 148         2  This character has a "strange" numeric value.
 149         3  A Java supradecimal digit: adding the digit offset to the
 150            character code, then masking with 0x1F, then adding 10
 151            will produce the desired numeric value.
 152     5 bits  The digit offset (see description of previous field)
 153     5 bits      Character type (see below)
 154 
 155     B: the high 16 bits are defined as:
 156     1 bit Other_Lowercase property
 157     1 bit Other_Uppercase property
 158     1 bit Other_Alphabetic property
 159     1 bit Other_Math property
 160     1 bit Ideographic property
 161     1 bit Noncharacter codepoint property


 162     */
 163 
 164 
 165     // bit masks identify each component of a 32-bit property field described
 166     // above.
 167     // shift* indicates how many shifts right must happen to get the
 168     // indicated property value in the lowest bits of the 32-bit space.
 169     private static final int
 170         shiftType           = 0,        maskType            =       0x001F,
 171         shiftDigitOffset    = 5,        maskDigitOffset     =       0x03E0,
 172         shiftNumericType    = 10,       maskNumericType     =       0x0C00,
 173         shiftIdentifierInfo = 12,       maskIdentifierInfo  =       0x7000,
 174                                         maskUnicodePart     =       0x1000,
 175         shiftCaseInfo       = 15,       maskCaseInfo        =      0x38000,
 176                                         maskLowerCase       =      0x20000,
 177                                         maskUpperCase       =      0x10000,
 178                                         maskTitleCase       =      0x08000,
 179         shiftCaseOffset     = 18,       maskCaseOffset      =   0x07FC0000,
 180         shiftCaseOffsetSign = 5,
 181                                         // used only when calculating and
 182                                         // storing digit offsets from char values
 183                                         maskDigit               =   0x001F,
 184                                         // case offset are 9 bits
 185                                         maskCase                =   0x01FF,
 186         shiftBidi           = 27,       maskBidi              = 0x78000000,
 187         shiftMirrored       = 31,       //maskMirrored          = 0x80000000,
 188         shiftPlane          = 16,       maskPlane = 0xFF0000;
 189 
 190     // maskMirrored needs to be long, if up 16-bit
 191     private static final long maskMirrored          = 0x80000000L;
 192 
 193     // bit masks identify the 16-bit priperty field described above, in B
 194     // table
 195     private static final long
 196         maskOtherLowercase  = 0x100000000L,
 197         maskOtherUppercase  = 0x200000000L,
 198         maskOtherAlphabetic = 0x400000000L,
 199         maskOtherMath       = 0x800000000L,
 200         maskIdeographic     = 0x1000000000L,
 201         maskNoncharacterCP  = 0x2000000000L;


 202 
 203     // Can compare masked values with these to determine
 204     // numeric or lexical types.
 205     public static int
 206         valueNotNumeric             = 0x0000,
 207         valueDigit                  = 0x0400,
 208         valueStrangeNumeric         = 0x0800,
 209         valueJavaSupradecimal       = 0x0C00,
 210         valueIgnorable              = 0x1000,
 211         valueJavaOnlyPart           = 0x2000,
 212         valueJavaUnicodePart        = 0x3000,
 213         valueJavaWhitespace         = 0x4000,
 214         valueJavaStartUnicodePart   = 0x5000,
 215         valueJavaOnlyStart          = 0x6000,
 216         valueJavaUnicodeStart       = 0x7000,
 217         lowJavaStart                = 0x5000,
 218         nonzeroJavaPart             = 0x3000,
 219         valueUnicodeStart           = 0x7000;
 220 
 221     // these values are used when only identifier properties are generated


 350                 System.out.println("An error has occured during spec mapping.");
 351                 System.exit(0);
 352             }
 353         }
 354         // if there are still unprocessed chars, process them
 355         // as unassigned/undefined.
 356         codePoint = (plane<<16) | k;
 357         while (k < result.length) {
 358             result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
 359             ++k;
 360             ++codePoint;
 361         }
 362         // now add all extra supported properties from PropList, to the
 363         // upper 16-bit
 364         addExProp(result, propList, "Other_Lowercase", maskOtherLowercase);
 365         addExProp(result, propList, "Other_Uppercase", maskOtherUppercase);
 366         addExProp(result, propList, "Other_Alphabetic", maskOtherAlphabetic);
 367         addExProp(result, propList, "Ideographic", maskIdeographic);
 368         //addExProp(result, propList, "Other_Math", maskOtherMath);
 369         //addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);


 370 
 371         return result;
 372     }
 373 
 374     // The maximum and minimum offsets found while scanning the database
 375     static int maxOffsetSeen = 0;
 376     static int minOffsetSeen = 0;
 377 
 378     /**
 379      * Some Unicode separator characters are not considered Java whitespace.
 380      * @param c character to test
 381      * @return true if c in an invalid Java whitespace character, false otherwise.
 382      */
 383     static boolean isInvalidJavaWhiteSpace(int c) {
 384         int[] exceptions = {0x00A0, 0x2007, 0x202F, 0xFEFF};
 385         boolean retValue = false;
 386         for(int x=0;x<exceptions.length;x++) {
 387             if(c == exceptions[x]) {
 388                 retValue = true;
 389                 break;


 763             return genAccess("A", x.substring(7, x.length()-1), (identifiers ? 2 : 32));
 764         if (x.length() >= 11 && x.substring(0, 9).equals("LookupEx(") &&
 765                 x.substring(x.length()-1).equals(")") )
 766             return genAccess("B", x.substring(9, x.length()-1), 16);
 767         if (x.equals("shiftType")) return Long.toString(shiftType);
 768         if (x.equals("shiftIdentifierInfo")) return Long.toString(shiftIdentifierInfo);
 769         if (x.equals("maskIdentifierInfo")) return "0x" + hex8(maskIdentifierInfo);
 770         if (x.equals("maskUnicodePart")) return "0x" + hex8(maskUnicodePart);
 771         if (x.equals("shiftCaseOffset")) return Long.toString(shiftCaseOffset);
 772         if (x.equals("shiftCaseInfo")) return Long.toString(shiftCaseInfo);
 773         if (x.equals("shiftCaseOffsetSign")) return Long.toString(shiftCaseOffsetSign);
 774         if (x.equals("maskCase")) return "0x" + hex8(maskCase);
 775         if (x.equals("maskCaseOffset")) return "0x" + hex8(maskCaseOffset);
 776         if (x.equals("maskLowerCase")) return "0x" + hex8(maskLowerCase);
 777         if (x.equals("maskUpperCase")) return "0x" + hex8(maskUpperCase);
 778         if (x.equals("maskTitleCase")) return "0x" + hex8(maskTitleCase);
 779         if (x.equals("maskOtherLowercase")) return "0x" + hex4(maskOtherLowercase >> 32);
 780         if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
 781         if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
 782         if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);


 783         if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
 784         if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
 785         if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
 786         if (x.equals("valueJavaUnicodePart")) return "0x" + hex8(valueJavaUnicodePart);
 787         if (x.equals("valueJavaOnlyPart")) return "0x" + hex8(valueJavaOnlyPart);
 788         if (x.equals("valueJavaWhitespace")) return "0x" + hex8(valueJavaWhitespace);
 789         if (x.equals("lowJavaStart")) return "0x" + hex8(lowJavaStart);
 790         if (x.equals("nonzeroJavaPart")) return "0x" + hex8(nonzeroJavaPart);
 791         if (x.equals("bitJavaStart")) return "0x" + hex8(bitJavaStart);
 792         if (x.equals("bitJavaPart")) return Long.toString(bitJavaPart);
 793         if (x.equals("valueUnicodeStart")) return "0x" + hex8(valueUnicodeStart);
 794         if (x.equals("maskIsJavaIdentifierStart")) return "0x" + hex(maskIsJavaIdentifierStart);
 795         if (x.equals("maskIsJavaIdentifierPart")) return "0x" + hex(maskIsJavaIdentifierPart);
 796         if (x.equals("shiftDigitOffset")) return Long.toString(shiftDigitOffset);
 797         if (x.equals("maskDigitOffset")) return "0x" + hex(maskDigitOffset);
 798         if (x.equals("maskDigit")) return "0x" + hex(maskDigit);
 799         if (x.equals("shiftNumericType")) return Long.toString(shiftNumericType);
 800         if (x.equals("maskNumericType")) return "0x" + hex(maskNumericType);
 801         if (x.equals("valueNotNumeric")) return "0x" + hex8(valueNotNumeric);
 802         if (x.equals("valueDigit")) return "0x" + hex8(valueDigit);


1595                 (bits == 4 ? "0xF" : "" + ((1 << bits) - 1)) + ")";
1596             access = extracted;
1597         }
1598         return access;
1599     }
1600 
1601     /* The command line arguments are decoded and used to set the following
1602      global variables.
1603      */
1604 
1605     static boolean verbose = false;
1606     static boolean nobidi = false;
1607     static boolean nomirror = false;
1608     static boolean identifiers = false;
1609     static boolean Csyntax = false;
1610     static String TemplateFileName = null;
1611     static String OutputFileName = null;
1612     static String UnicodeSpecFileName = null; // liu
1613     static String SpecialCasingFileName = null;
1614     static String PropListFileName = null;

1615     static boolean useCharForByte = false;
1616     static int[] sizes;
1617     static int bins = 0; // liu; if > 0, then perform search
1618     static boolean tableAsString = false;
1619     static boolean bLatin1 = false;
1620 
1621     static String commandLineDescription;
1622 
1623     /* Other global variables, equal in length to the "sizes" array. */
1624 
1625     static int[] shifts;
1626     static int[] zeroextend;
1627     static int[] bytes;
1628     static boolean[] preshifted;
1629     static long[][] tables;
1630 
1631 
1632     /* Other global variables */
1633     static String commentStart;
1634     static String commentEnd;


1722                 else {
1723                     UnicodeSpecFileName = args[++j];
1724                 }
1725             }
1726             else if (args[j].equals("-specialcasing")) {
1727                 if (j == args.length -1) {
1728                     FAIL("File name missing after -specialcasing");
1729                 }
1730                 else {
1731                     SpecialCasingFileName = args[++j];
1732                 }
1733             }
1734             else if (args[j].equals("-proplist")) {
1735                 if (j == args.length -1) {
1736                     FAIL("File name missing after -proplist");
1737                 }
1738                 else {
1739                     PropListFileName = args[++j];
1740                 }
1741             }








1742             else if (args[j].equals("-plane")) {
1743                 if (j == args.length -1) {
1744                     FAIL("Plane number missing after -plane");
1745                 }
1746                 else {
1747                     plane = Integer.parseInt(args[++j]);
1748                 }
1749                 if (plane > 0) {
1750                     bLatin1 = false;
1751                 }
1752             }
1753             else if ("-usecharforbyte".equals(args[j])) {
1754                 useCharForByte = true;
1755             }
1756             else if (args[j].equals("-latin1")) {
1757                 bLatin1 = true;
1758                 plane = 0;
1759             }
1760             else {
1761                 try {


1786                 sizes = newsizes;
1787             }
1788             else {
1789                 int[] newsizes = { 10, 5, 1 }; // Guy's old defaults for 2.0.14: { 9, 4, 3, 0 }
1790                 desc.append("10 5 1]");
1791                 sizes = newsizes;
1792             }
1793         }
1794         if (UnicodeSpecFileName == null) { // liu
1795             UnicodeSpecFileName = DefaultUnicodeSpecFileName;
1796             desc.append(" [-spec " + UnicodeSpecFileName + ']');
1797         }
1798         if (SpecialCasingFileName == null) {
1799             SpecialCasingFileName = DefaultSpecialCasingFileName;
1800             desc.append(" [-specialcasing " + SpecialCasingFileName + ']');
1801         }
1802         if (PropListFileName == null) {
1803             PropListFileName = DefaultPropListFileName;
1804             desc.append(" [-proplist " + PropListFileName + ']');
1805         }




1806         if (TemplateFileName == null) {
1807             TemplateFileName = (Csyntax ? DefaultCTemplateFileName
1808                   : DefaultJavaTemplateFileName);
1809             desc.append(" [-template " + TemplateFileName + ']');
1810         }
1811         if (OutputFileName == null) {
1812             OutputFileName = (Csyntax ? DefaultCOutputFileName
1813                     : DefaultJavaOutputFileName);
1814             desc.append(" [-o " + OutputFileName + ']');
1815         }
1816         commentStart = (Csyntax ? "/*" : "//");
1817         commentEnd = (Csyntax ? " */" : "");
1818         commandLineDescription = desc.toString();
1819     }
1820 
1821     private static void searchBins(long[] map, int binsOccupied) throws Exception {
1822         int bitsFree = 16;
1823         for (int i=0; i<binsOccupied; ++i) bitsFree -= sizes[i];
1824         if (binsOccupied == (bins-1)) {
1825             sizes[binsOccupied] = bitsFree;


1937     * <li> Generate the source code for the class Character by performing
1938     *           macro processing on a template file.
1939     * </ol>
1940     *
1941     * @param args       the command line arguments, as an array of String
1942     *
1943     * @see GenerateCharacter#processArgs
1944     * @see UnicodeSpec@readSpecFile
1945     * @see GenerateCharacter#buildMap
1946     * @see GenerateCharacter#buildTable
1947     * @see GenerateCharacter#generateCharacterClass
1948     */
1949 
1950     public static void main(String[] args) {
1951         processArgs(args);
1952         try {
1953 
1954             UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
1955             specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
1956             PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);

1957 
1958             if (verbose) {
1959                 System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
1960             }
1961             long[] map = buildMap(data, specialCaseMaps, propList);
1962             if (verbose) {
1963                 System.err.println("Completed building of initial map");
1964             }
1965 
1966             if (bins == 0) {
1967                 generateForSizes(map);
1968             }
1969             else {
1970                 while (bins > 0) {
1971                     sizes = new int[bins];
1972                     searchBins(map, 0);
1973                     --bins;
1974                 }
1975             }
1976             if (verbose && false) {
   1 /*
   2  * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  56  * markers consisting of an alphabetic name string preceded by "$$".
  57  * Such markers are replaced with generated program text.  As a special
  58  * case, the marker "Lookup(xxx)" is recognized, where "xxx" consists of
  59  * alphabetic characters constituting a variable name.  The character "_"
  60  * is considered alphabetic for these purposes.
  61  *
  62  * @author  Guy Steele
  63  * @author  Alan Liu
  64  * @author  John O'Conner
  65  */
  66 
  67 public class GenerateCharacter {
  68 
  69     final static boolean DEBUG = false;
  70 
  71     final static String commandMarker = "$$";
  72     static String ROOT                        = "";
  73     static String DefaultUnicodeSpecFileName  = ROOT + "UnicodeData.txt";
  74     static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
  75     static String DefaultPropListFileName     = ROOT + "PropList.txt";
  76     static String DefaultDerivedPropsFileName = ROOT + "DerivedCoreProperties.txt";
  77     static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
  78     static String DefaultJavaOutputFileName   = ROOT + "Character.java";
  79     static String DefaultCTemplateFileName    = ROOT + "Character.c.template";
  80     static String DefaultCOutputFileName      = ROOT + "Character.c";
  81 
  82     static int plane = 0;
  83 
  84     /* The overall idea is that, in the generated Character class source code,
  85     most character property data is stored in a special multi-level table whose
  86     structure is defined by a sequence of nonnegative integers [k1, k2, ..., kn].
  87     The integers must sum to 16 (the number of bits in a character).
  88     The first table is indexed by the k1 high-order bits of the character code.
  89     The result is concatenated to the next k2 bits of the character code to index
  90     the second table, and so on.  Eventually the kn low-order bits of the character
  91     code are concatenated and used to index one of two tables A and B; A contains
  92     32-bit integer entries and B contains 16-bit short entries.  The 48 bits that
  93     can be thus obtained encode the properties for the character.
  94 
  95     The default specification is [9, 4, 3, 0].  This particular table format was
  96     designed by conducting an exhaustive search of table formats to minimize the


 143            4 is Java whitespace
 144     2 bits      This field indicates whether the character has a numeric property.
 145         The four possible values for this field are as follows:
 146         0  This character has no numeric property.
 147         1  Adding the digit offset to the character code and then
 148            masking with 0x1F will produce the desired numeric value.
 149         2  This character has a "strange" numeric value.
 150         3  A Java supradecimal digit: adding the digit offset to the
 151            character code, then masking with 0x1F, then adding 10
 152            will produce the desired numeric value.
 153     5 bits  The digit offset (see description of previous field)
 154     5 bits      Character type (see below)
 155 
 156     B: the high 16 bits are defined as:
 157     1 bit Other_Lowercase property
 158     1 bit Other_Uppercase property
 159     1 bit Other_Alphabetic property
 160     1 bit Other_Math property
 161     1 bit Ideographic property
 162     1 bit Noncharacter codepoint property
 163     1 bit ID_Start property
 164     1 bit ID_Continue property
 165     */
 166 
 167 
 168     // bit masks identify each component of a 32-bit property field described
 169     // above.
 170     // shift* indicates how many shifts right must happen to get the
 171     // indicated property value in the lowest bits of the 32-bit space.
 172     private static final int
 173         shiftType           = 0,        maskType            =       0x001F,
 174         shiftDigitOffset    = 5,        maskDigitOffset     =       0x03E0,
 175         shiftNumericType    = 10,       maskNumericType     =       0x0C00,
 176         shiftIdentifierInfo = 12,       maskIdentifierInfo  =       0x7000,
 177                                         maskUnicodePart     =       0x1000,
 178         shiftCaseInfo       = 15,       maskCaseInfo        =      0x38000,
 179                                         maskLowerCase       =      0x20000,
 180                                         maskUpperCase       =      0x10000,
 181                                         maskTitleCase       =      0x08000,
 182         shiftCaseOffset     = 18,       maskCaseOffset      =   0x07FC0000,
 183         shiftCaseOffsetSign = 5,
 184                                         // used only when calculating and
 185                                         // storing digit offsets from char values
 186                                         maskDigit               =   0x001F,
 187                                         // case offset are 9 bits
 188                                         maskCase                =   0x01FF,
 189         shiftBidi           = 27,       maskBidi              = 0x78000000,
 190         shiftMirrored       = 31,       //maskMirrored          = 0x80000000,
 191         shiftPlane          = 16,       maskPlane = 0xFF0000;
 192 
 193     // maskMirrored needs to be long, if up 16-bit
 194     private static final long maskMirrored          = 0x80000000L;
 195 
 196     // bit masks identify the 16-bit property field described above, in B
 197     // table
 198     private static final long
 199         maskOtherLowercase  = 0x100000000L,
 200         maskOtherUppercase  = 0x200000000L,
 201         maskOtherAlphabetic = 0x400000000L,
 202         maskOtherMath       = 0x800000000L,
 203         maskIdeographic     = 0x1000000000L,
 204         maskNoncharacterCP  = 0x2000000000L,
 205         maskIDStart         = 0x4000000000L,
 206         maskIDContinue      = 0x8000000000L;
 207 
 208     // Can compare masked values with these to determine
 209     // numeric or lexical types.
 210     public static int
 211         valueNotNumeric             = 0x0000,
 212         valueDigit                  = 0x0400,
 213         valueStrangeNumeric         = 0x0800,
 214         valueJavaSupradecimal       = 0x0C00,
 215         valueIgnorable              = 0x1000,
 216         valueJavaOnlyPart           = 0x2000,
 217         valueJavaUnicodePart        = 0x3000,
 218         valueJavaWhitespace         = 0x4000,
 219         valueJavaStartUnicodePart   = 0x5000,
 220         valueJavaOnlyStart          = 0x6000,
 221         valueJavaUnicodeStart       = 0x7000,
 222         lowJavaStart                = 0x5000,
 223         nonzeroJavaPart             = 0x3000,
 224         valueUnicodeStart           = 0x7000;
 225 
 226     // these values are used when only identifier properties are generated


 355                 System.out.println("An error has occured during spec mapping.");
 356                 System.exit(0);
 357             }
 358         }
 359         // if there are still unprocessed chars, process them
 360         // as unassigned/undefined.
 361         codePoint = (plane<<16) | k;
 362         while (k < result.length) {
 363             result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
 364             ++k;
 365             ++codePoint;
 366         }
 367         // now add all extra supported properties from PropList, to the
 368         // upper 16-bit
 369         addExProp(result, propList, "Other_Lowercase", maskOtherLowercase);
 370         addExProp(result, propList, "Other_Uppercase", maskOtherUppercase);
 371         addExProp(result, propList, "Other_Alphabetic", maskOtherAlphabetic);
 372         addExProp(result, propList, "Ideographic", maskIdeographic);
 373         //addExProp(result, propList, "Other_Math", maskOtherMath);
 374         //addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);
 375         addExProp(result, propList, "ID_Start", maskIDStart);
 376         addExProp(result, propList, "ID_Continue", maskIDContinue);
 377 
 378         return result;
 379     }
 380 
 381     // The maximum and minimum offsets found while scanning the database
 382     static int maxOffsetSeen = 0;
 383     static int minOffsetSeen = 0;
 384 
 385     /**
 386      * Some Unicode separator characters are not considered Java whitespace.
 387      * @param c character to test
 388      * @return true if c in an invalid Java whitespace character, false otherwise.
 389      */
 390     static boolean isInvalidJavaWhiteSpace(int c) {
 391         int[] exceptions = {0x00A0, 0x2007, 0x202F, 0xFEFF};
 392         boolean retValue = false;
 393         for(int x=0;x<exceptions.length;x++) {
 394             if(c == exceptions[x]) {
 395                 retValue = true;
 396                 break;


 770             return genAccess("A", x.substring(7, x.length()-1), (identifiers ? 2 : 32));
 771         if (x.length() >= 11 && x.substring(0, 9).equals("LookupEx(") &&
 772                 x.substring(x.length()-1).equals(")") )
 773             return genAccess("B", x.substring(9, x.length()-1), 16);
 774         if (x.equals("shiftType")) return Long.toString(shiftType);
 775         if (x.equals("shiftIdentifierInfo")) return Long.toString(shiftIdentifierInfo);
 776         if (x.equals("maskIdentifierInfo")) return "0x" + hex8(maskIdentifierInfo);
 777         if (x.equals("maskUnicodePart")) return "0x" + hex8(maskUnicodePart);
 778         if (x.equals("shiftCaseOffset")) return Long.toString(shiftCaseOffset);
 779         if (x.equals("shiftCaseInfo")) return Long.toString(shiftCaseInfo);
 780         if (x.equals("shiftCaseOffsetSign")) return Long.toString(shiftCaseOffsetSign);
 781         if (x.equals("maskCase")) return "0x" + hex8(maskCase);
 782         if (x.equals("maskCaseOffset")) return "0x" + hex8(maskCaseOffset);
 783         if (x.equals("maskLowerCase")) return "0x" + hex8(maskLowerCase);
 784         if (x.equals("maskUpperCase")) return "0x" + hex8(maskUpperCase);
 785         if (x.equals("maskTitleCase")) return "0x" + hex8(maskTitleCase);
 786         if (x.equals("maskOtherLowercase")) return "0x" + hex4(maskOtherLowercase >> 32);
 787         if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
 788         if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
 789         if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
 790         if (x.equals("maskIDStart")) return "0x" + hex4(maskIDStart >> 32);
 791         if (x.equals("maskIDContinue")) return "0x" + hex4(maskIDContinue >> 32);
 792         if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
 793         if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
 794         if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
 795         if (x.equals("valueJavaUnicodePart")) return "0x" + hex8(valueJavaUnicodePart);
 796         if (x.equals("valueJavaOnlyPart")) return "0x" + hex8(valueJavaOnlyPart);
 797         if (x.equals("valueJavaWhitespace")) return "0x" + hex8(valueJavaWhitespace);
 798         if (x.equals("lowJavaStart")) return "0x" + hex8(lowJavaStart);
 799         if (x.equals("nonzeroJavaPart")) return "0x" + hex8(nonzeroJavaPart);
 800         if (x.equals("bitJavaStart")) return "0x" + hex8(bitJavaStart);
 801         if (x.equals("bitJavaPart")) return Long.toString(bitJavaPart);
 802         if (x.equals("valueUnicodeStart")) return "0x" + hex8(valueUnicodeStart);
 803         if (x.equals("maskIsJavaIdentifierStart")) return "0x" + hex(maskIsJavaIdentifierStart);
 804         if (x.equals("maskIsJavaIdentifierPart")) return "0x" + hex(maskIsJavaIdentifierPart);
 805         if (x.equals("shiftDigitOffset")) return Long.toString(shiftDigitOffset);
 806         if (x.equals("maskDigitOffset")) return "0x" + hex(maskDigitOffset);
 807         if (x.equals("maskDigit")) return "0x" + hex(maskDigit);
 808         if (x.equals("shiftNumericType")) return Long.toString(shiftNumericType);
 809         if (x.equals("maskNumericType")) return "0x" + hex(maskNumericType);
 810         if (x.equals("valueNotNumeric")) return "0x" + hex8(valueNotNumeric);
 811         if (x.equals("valueDigit")) return "0x" + hex8(valueDigit);


1604                 (bits == 4 ? "0xF" : "" + ((1 << bits) - 1)) + ")";
1605             access = extracted;
1606         }
1607         return access;
1608     }
1609 
1610     /* The command line arguments are decoded and used to set the following
1611      global variables.
1612      */
1613 
1614     static boolean verbose = false;
1615     static boolean nobidi = false;
1616     static boolean nomirror = false;
1617     static boolean identifiers = false;
1618     static boolean Csyntax = false;
1619     static String TemplateFileName = null;
1620     static String OutputFileName = null;
1621     static String UnicodeSpecFileName = null; // liu
1622     static String SpecialCasingFileName = null;
1623     static String PropListFileName = null;
1624     static String DerivedPropsFileName = null;
1625     static boolean useCharForByte = false;
1626     static int[] sizes;
1627     static int bins = 0; // liu; if > 0, then perform search
1628     static boolean tableAsString = false;
1629     static boolean bLatin1 = false;
1630 
1631     static String commandLineDescription;
1632 
1633     /* Other global variables, equal in length to the "sizes" array. */
1634 
1635     static int[] shifts;
1636     static int[] zeroextend;
1637     static int[] bytes;
1638     static boolean[] preshifted;
1639     static long[][] tables;
1640 
1641 
1642     /* Other global variables */
1643     static String commentStart;
1644     static String commentEnd;


1732                 else {
1733                     UnicodeSpecFileName = args[++j];
1734                 }
1735             }
1736             else if (args[j].equals("-specialcasing")) {
1737                 if (j == args.length -1) {
1738                     FAIL("File name missing after -specialcasing");
1739                 }
1740                 else {
1741                     SpecialCasingFileName = args[++j];
1742                 }
1743             }
1744             else if (args[j].equals("-proplist")) {
1745                 if (j == args.length -1) {
1746                     FAIL("File name missing after -proplist");
1747                 }
1748                 else {
1749                     PropListFileName = args[++j];
1750                 }
1751             }
1752             else if (args[j].equals("-derivedprops")) {
1753                 if (j == args.length -1) {
1754                     FAIL("File name missing after -derivedprops");
1755                 }
1756                 else {
1757                     DerivedPropsFileName = args[++j];
1758                 }
1759             }
1760             else if (args[j].equals("-plane")) {
1761                 if (j == args.length -1) {
1762                     FAIL("Plane number missing after -plane");
1763                 }
1764                 else {
1765                     plane = Integer.parseInt(args[++j]);
1766                 }
1767                 if (plane > 0) {
1768                     bLatin1 = false;
1769                 }
1770             }
1771             else if ("-usecharforbyte".equals(args[j])) {
1772                 useCharForByte = true;
1773             }
1774             else if (args[j].equals("-latin1")) {
1775                 bLatin1 = true;
1776                 plane = 0;
1777             }
1778             else {
1779                 try {


1804                 sizes = newsizes;
1805             }
1806             else {
1807                 int[] newsizes = { 10, 5, 1 }; // Guy's old defaults for 2.0.14: { 9, 4, 3, 0 }
1808                 desc.append("10 5 1]");
1809                 sizes = newsizes;
1810             }
1811         }
1812         if (UnicodeSpecFileName == null) { // liu
1813             UnicodeSpecFileName = DefaultUnicodeSpecFileName;
1814             desc.append(" [-spec " + UnicodeSpecFileName + ']');
1815         }
1816         if (SpecialCasingFileName == null) {
1817             SpecialCasingFileName = DefaultSpecialCasingFileName;
1818             desc.append(" [-specialcasing " + SpecialCasingFileName + ']');
1819         }
1820         if (PropListFileName == null) {
1821             PropListFileName = DefaultPropListFileName;
1822             desc.append(" [-proplist " + PropListFileName + ']');
1823         }
1824         if (DerivedPropsFileName == null) {
1825             DerivedPropsFileName = DefaultDerivedPropsFileName;
1826             desc.append(" [-derivedprops " + DerivedPropsFileName + ']');
1827         }
1828         if (TemplateFileName == null) {
1829             TemplateFileName = (Csyntax ? DefaultCTemplateFileName
1830                   : DefaultJavaTemplateFileName);
1831             desc.append(" [-template " + TemplateFileName + ']');
1832         }
1833         if (OutputFileName == null) {
1834             OutputFileName = (Csyntax ? DefaultCOutputFileName
1835                     : DefaultJavaOutputFileName);
1836             desc.append(" [-o " + OutputFileName + ']');
1837         }
1838         commentStart = (Csyntax ? "/*" : "//");
1839         commentEnd = (Csyntax ? " */" : "");
1840         commandLineDescription = desc.toString();
1841     }
1842 
1843     private static void searchBins(long[] map, int binsOccupied) throws Exception {
1844         int bitsFree = 16;
1845         for (int i=0; i<binsOccupied; ++i) bitsFree -= sizes[i];
1846         if (binsOccupied == (bins-1)) {
1847             sizes[binsOccupied] = bitsFree;


1959     * <li> Generate the source code for the class Character by performing
1960     *           macro processing on a template file.
1961     * </ol>
1962     *
1963     * @param args       the command line arguments, as an array of String
1964     *
1965     * @see GenerateCharacter#processArgs
1966     * @see UnicodeSpec@readSpecFile
1967     * @see GenerateCharacter#buildMap
1968     * @see GenerateCharacter#buildTable
1969     * @see GenerateCharacter#generateCharacterClass
1970     */
1971 
1972     public static void main(String[] args) {
1973         processArgs(args);
1974         try {
1975 
1976             UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
1977             specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
1978             PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
1979             propList.putAll(PropList.readSpecFile(new File(DerivedPropsFileName), plane));
1980 
1981             if (verbose) {
1982                 System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
1983             }
1984             long[] map = buildMap(data, specialCaseMaps, propList);
1985             if (verbose) {
1986                 System.err.println("Completed building of initial map");
1987             }
1988 
1989             if (bins == 0) {
1990                 generateForSizes(map);
1991             }
1992             else {
1993                 while (bins > 0) {
1994                     sizes = new int[bins];
1995                     searchBins(map, 0);
1996                     --bins;
1997                 }
1998             }
1999             if (verbose && false) {
< prev index next >