1 package build.tools.generatecharacter; 2 3 import java.io.*; 4 import java.nio.*; 5 import java.util.*; 6 import java.util.zip.*; 7 8 public class CharacterName { 9 10 public static void main(String[] args) { 11 FileReader reader = null; 12 try { 13 if (args.length != 2) { 14 System.err.println("Usage: java CharacterName UnicodeData.txt uniName.dat"); 15 System.exit(1); 16 } 17 reader = new FileReader(args[0]); 18 BufferedReader bfr = new BufferedReader(reader); 19 String line = null; 20 21 StringBuilder namePool = new StringBuilder(); 22 byte[] cpPoolBytes = new byte[0x100000]; 23 boolean[] cpBlocks = new boolean[(Character.MAX_CODE_POINT + 1) >> 8]; 24 int bkNum = 0; 25 ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes); 26 int lastCp = 0; 27 int cpNum = 0; 28 29 while ((line = bfr.readLine()) != null) { 30 if (line.startsWith("#")) 31 continue; 32 UnicodeSpec spec = UnicodeSpec.parse(line); 33 if (spec != null) { 34 int cp = spec.getCodePoint(); 35 String name = spec.getName(); 36 if (name.equals("<control>") && spec.getOldName() != null) { 37 if (cp == 0x7) // <control>BELL -> BEL; u+1f514 <-> BELL 38 name = "BEL"; 39 else if (spec.getOldName().length() != 0) 40 name = spec.getOldName(); 41 /* 42 3 "figment" characters from NameAliases.txt 43 Several documented labels for C1 control code points which 44 were never actually approved in any standard...but were 45 implemented in Perl regex. 46 0080;PADDING CHARACTER;figment 47 0081;HIGH OCTET PRESET;figment 48 0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment 49 */ 50 else if (cp == 0x80) 51 name = "PADDING CHARACTER"; 52 else if (cp == 0x81) 53 name = "HIGH OCTET PRESET"; 54 else if (cp == 0x99) 55 name = "SINGLE GRAPHIC CHARACTER INTRODUCER"; 56 else 57 continue; 58 } else if (name.startsWith("<")) { 59 /* 60 3400 <CJK Ideograph Extension A, First> 61 4db5 <CJK Ideograph Extension A, Last> 62 4e00 <CJK Ideograph, First> 63 9fc3 <CJK Ideograph, Last> 64 ac00 <Hangul Syllable, First> 65 d7a3 <Hangul Syllable, Last> 66 d800 <Non Private Use High Surrogate, First> 67 db7f <Non Private Use High Surrogate, Last> 68 db80 <Private Use High Surrogate, First> 69 dbff <Private Use High Surrogate, Last> 70 dc00 <Low Surrogate, First> 71 dfff <Low Surrogate, Last> 72 e000 <Private Use, First> 73 f8ff <Private Use, Last> 74 20000 <CJK Ideograph Extension B, First> 75 2a6d6 <CJK Ideograph Extension B, Last> 76 f0000 <Plane 15 Private Use, First> 77 ffffd <Plane 15 Private Use, Last> 78 */ 79 continue; 80 } 81 cpNum++; 82 if (!cpBlocks[cp >> 8]) { 83 cpBlocks[cp >> 8] = true; 84 bkNum++; 85 } 86 if (cp == lastCp + 1) { 87 cpBB.put((byte)name.length()); 88 } else { 89 cpBB.put((byte)0); // segment start flag 90 cpBB.putInt((name.length() << 24) | (cp & 0xffffff)); 91 } 92 namePool.append(name); 93 lastCp = cp; 94 } 95 } 96 97 byte[] namePoolBytes = namePool.toString().getBytes("ASCII"); 98 int cpLen = cpBB.position(); 99 int total = cpLen + namePoolBytes.length; 100 DataOutputStream dos = new DataOutputStream( 101 new DeflaterOutputStream( 102 new FileOutputStream(args[1]))); 103 dos.writeInt(total); // total 104 dos.writeInt(bkNum); // bkNum; 105 dos.writeInt(cpNum); // cpNum 106 dos.writeInt(cpLen); // nameOff 107 dos.write(cpPoolBytes, 0, cpLen); 108 dos.write(namePoolBytes); 109 dos.close(); 110 111 } catch (Throwable e) { 112 System.out.println("Unexpected exception:"); 113 e.printStackTrace(); 114 } finally { 115 if (reader != null) { 116 try { 117 reader.close(); 118 } catch (Throwable ee) { ee.printStackTrace(); } 119 } 120 } 121 } 122 }