1 /* 2 * Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.generatecharacter; 27 28 import java.io.*; 29 import java.nio.*; 30 import java.util.*; 31 import java.util.zip.*; 32 33 public class CharacterName { 34 35 public static void main(String[] args) { 36 FileReader reader = null; 37 try { 38 if (args.length != 2) { 39 System.err.println("Usage: java CharacterName UnicodeData.txt uniName.dat"); 40 System.exit(1); 41 } 42 reader = new FileReader(args[0]); 43 BufferedReader bfr = new BufferedReader(reader); 44 String line = null; 45 46 StringBuilder namePool = new StringBuilder(); 47 byte[] cpPoolBytes = new byte[0x100000]; 48 boolean[] cpBlocks = new boolean[(Character.MAX_CODE_POINT + 1) >> 8]; 49 int bkNum = 0; 50 ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes); 51 int lastCp = 0; 52 int cpNum = 0; 53 54 while ((line = bfr.readLine()) != null) { 55 if (line.startsWith("#")) 56 continue; 57 UnicodeSpec spec = UnicodeSpec.parse(line); 58 if (spec != null) { 59 int cp = spec.getCodePoint(); 60 String name = spec.getName(); 61 if (name.equals("<control>") && spec.getOldName() != null) { 62 if (cp == 0x7) // <control>BELL -> BEL; u+1f514 <-> BELL 63 name = "BEL"; 64 else if (spec.getOldName().length() != 0) 65 name = spec.getOldName(); 66 /* 67 3 "figment" characters from NameAliases.txt 68 Several documented labels for C1 control code points which 69 were never actually approved in any standard...but were 70 implemented in Perl regex. 71 0080;PADDING CHARACTER;figment 72 0081;HIGH OCTET PRESET;figment 73 0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment 74 */ 75 else if (cp == 0x80) 76 name = "PADDING CHARACTER"; 77 else if (cp == 0x81) 78 name = "HIGH OCTET PRESET"; 79 else if (cp == 0x99) 80 name = "SINGLE GRAPHIC CHARACTER INTRODUCER"; 81 else 82 continue; 83 } else if (name.startsWith("<")) { 84 /* 85 3400 <CJK Ideograph Extension A, First> 86 4db5 <CJK Ideograph Extension A, Last> 87 4e00 <CJK Ideograph, First> 88 9fc3 <CJK Ideograph, Last> 89 ac00 <Hangul Syllable, First> 90 d7a3 <Hangul Syllable, Last> 91 d800 <Non Private Use High Surrogate, First> 92 db7f <Non Private Use High Surrogate, Last> 93 db80 <Private Use High Surrogate, First> 94 dbff <Private Use High Surrogate, Last> 95 dc00 <Low Surrogate, First> 96 dfff <Low Surrogate, Last> 97 e000 <Private Use, First> 98 f8ff <Private Use, Last> 99 20000 <CJK Ideograph Extension B, First> 100 2a6d6 <CJK Ideograph Extension B, Last> 101 f0000 <Plane 15 Private Use, First> 102 ffffd <Plane 15 Private Use, Last> 103 */ 104 continue; 105 } 106 cpNum++; 107 if (!cpBlocks[cp >> 8]) { 108 cpBlocks[cp >> 8] = true; 109 bkNum++; 110 } 111 if (cp == lastCp + 1) { 112 cpBB.put((byte)name.length()); 113 } else { 114 cpBB.put((byte)0); // segment start flag 115 cpBB.putInt((name.length() << 24) | (cp & 0xffffff)); 116 } 117 namePool.append(name); 118 lastCp = cp; 119 } 120 } 121 122 byte[] namePoolBytes = namePool.toString().getBytes("ASCII"); 123 int cpLen = cpBB.position(); 124 int total = cpLen + namePoolBytes.length; 125 DataOutputStream dos = new DataOutputStream( 126 new DeflaterOutputStream( 127 new FileOutputStream(args[1]))); 128 dos.writeInt(total); // total 129 dos.writeInt(bkNum); // bkNum; 130 dos.writeInt(cpNum); // cpNum 131 dos.writeInt(cpLen); // nameOff 132 dos.write(cpPoolBytes, 0, cpLen); 133 dos.write(namePoolBytes); 134 dos.close(); 135 136 } catch (Throwable e) { 137 System.out.println("Unexpected exception:"); 138 e.printStackTrace(); 139 } finally { 140 if (reader != null) { 141 try { 142 reader.close(); 143 } catch (Throwable ee) { ee.printStackTrace(); } 144 } 145 } 146 } 147 }