1 package build.tools.generatecharacter;
   2 
   3 import java.io.*;
   4 import java.nio.*;
   5 import java.util.*;
   6 import java.util.zip.*;
   7 
   8 public class CharacterName {
   9 
  10     public static void main(String[] args) {
  11         FileReader reader = null;
  12         try {
  13             if (args.length != 2) {
  14                 System.err.println("Usage: java CharacterName UnicodeData.txt uniName.dat");
  15                 System.exit(1);
  16             }
  17             reader = new FileReader(args[0]);
  18             BufferedReader bfr = new BufferedReader(reader);
  19             String line = null;
  20 
  21             StringBuilder namePool = new StringBuilder();
  22             byte[] cpPoolBytes = new byte[0x100000];
  23             boolean[] cpBlocks = new boolean[(Character.MAX_CODE_POINT + 1) >> 8];
  24             int bkNum = 0; 
  25             ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes);
  26             int lastCp = 0;
  27             int cpNum = 0;
  28 
  29             while ((line = bfr.readLine()) != null) {
  30                 if (line.startsWith("#"))
  31                     continue;
  32                 UnicodeSpec spec = UnicodeSpec.parse(line);
  33                 if (spec != null) {
  34                     int cp = spec.getCodePoint();
  35                     String name = spec.getName();
  36                     if (name.equals("<control>") && spec.getOldName() != null) {
  37                         if (cp == 0x7)  // <control>BELL -> BEL; u+1f514 <-> BELL
  38                             name = "BEL";
  39                         else if (spec.getOldName().length() != 0)
  40                             name = spec.getOldName();
  41                         /*
  42                            3 "figment" characters from NameAliases.txt
  43                            Several documented labels for C1 control code points which
  44                            were never actually approved in any standard...but were
  45                            implemented in Perl regex.
  46                            0080;PADDING CHARACTER;figment
  47                            0081;HIGH OCTET PRESET;figment
  48                            0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
  49                         */
  50                         else if (cp == 0x80)
  51                             name = "PADDING CHARACTER";
  52                         else if (cp == 0x81)
  53                             name = "HIGH OCTET PRESET";
  54                         else if (cp == 0x99)
  55                             name = "SINGLE GRAPHIC CHARACTER INTRODUCER";
  56                         else
  57                             continue;
  58                     } else if (name.startsWith("<")) {
  59                         /*
  60                           3400    <CJK Ideograph Extension A, First>
  61                           4db5    <CJK Ideograph Extension A, Last>
  62                           4e00    <CJK Ideograph, First>
  63                           9fc3    <CJK Ideograph, Last>
  64                           ac00    <Hangul Syllable, First>
  65                           d7a3    <Hangul Syllable, Last>
  66                           d800    <Non Private Use High Surrogate, First>
  67                           db7f    <Non Private Use High Surrogate, Last>
  68                           db80    <Private Use High Surrogate, First>
  69                           dbff    <Private Use High Surrogate, Last>
  70                           dc00    <Low Surrogate, First>
  71                           dfff    <Low Surrogate, Last>
  72                           e000    <Private Use, First>
  73                           f8ff    <Private Use, Last>
  74                          20000    <CJK Ideograph Extension B, First>
  75                          2a6d6    <CJK Ideograph Extension B, Last>
  76                          f0000    <Plane 15 Private Use, First>
  77                          ffffd    <Plane 15 Private Use, Last>
  78                         */
  79                         continue;
  80                     }
  81                     cpNum++;
  82                     if (!cpBlocks[cp >> 8]) {
  83                         cpBlocks[cp >> 8] = true;
  84                         bkNum++;
  85                     }
  86                     if (cp == lastCp + 1) {
  87                         cpBB.put((byte)name.length());
  88                     } else {
  89                         cpBB.put((byte)0);  // segment start flag
  90                         cpBB.putInt((name.length() << 24) | (cp & 0xffffff));
  91                     }
  92                     namePool.append(name);
  93                     lastCp = cp;
  94                 }
  95             }
  96 
  97             byte[] namePoolBytes = namePool.toString().getBytes("ASCII");
  98             int cpLen = cpBB.position();
  99             int total = cpLen + namePoolBytes.length;
 100             DataOutputStream dos = new DataOutputStream(
 101                                        new DeflaterOutputStream(
 102                                            new FileOutputStream(args[1])));
 103             dos.writeInt(total);  // total
 104             dos.writeInt(bkNum);  // bkNum;
 105             dos.writeInt(cpNum);  // cpNum
 106             dos.writeInt(cpLen);  // nameOff
 107             dos.write(cpPoolBytes, 0, cpLen);
 108             dos.write(namePoolBytes);
 109             dos.close();
 110 
 111         } catch (Throwable e) {
 112             System.out.println("Unexpected exception:");
 113             e.printStackTrace();
 114         } finally {
 115             if (reader != null) {
 116                 try {
 117                     reader.close();
 118                 } catch (Throwable ee) { ee.printStackTrace(); }
 119             }
 120         }
 121     }
 122 }