1 /*
   2  * Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.generatecharacter;
  27 
  28 import java.io.*;
  29 import java.nio.*;
  30 import java.util.*;
  31 import java.util.zip.*;
  32 
  33 public class CharacterName {
  34 
  35     public static void main(String[] args) {
  36         FileReader reader = null;
  37         try {
  38             if (args.length != 2) {
  39                 System.err.println("Usage: java CharacterName UnicodeData.txt uniName.dat");
  40                 System.exit(1);
  41             }
  42             reader = new FileReader(args[0]);
  43             BufferedReader bfr = new BufferedReader(reader);
  44             String line = null;
  45 
  46             StringBuilder namePool = new StringBuilder();
  47             byte[] cpPoolBytes = new byte[0x100000];
  48             boolean[] cpBlocks = new boolean[(Character.MAX_CODE_POINT + 1) >> 8];
  49             int bkNum = 0;
  50             ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes);
  51             int lastCp = 0;
  52             int cpNum = 0;
  53 
  54             while ((line = bfr.readLine()) != null) {
  55                 if (line.startsWith("#"))
  56                     continue;
  57                 UnicodeSpec spec = UnicodeSpec.parse(line);
  58                 if (spec != null) {
  59                     int cp = spec.getCodePoint();
  60                     String name = spec.getName();
  61                     if (name.equals("<control>") && spec.getOldName() != null) {
  62                         if (cp == 0x7)  // <control>BELL -> BEL; u+1f514 <-> BELL
  63                             name = "BEL";
  64                         else if (spec.getOldName().length() != 0)
  65                             name = spec.getOldName();
  66                         /*
  67                            3 "figment" characters from NameAliases.txt
  68                            Several documented labels for C1 control code points which
  69                            were never actually approved in any standard...but were
  70                            implemented in Perl regex.
  71                            0080;PADDING CHARACTER;figment
  72                            0081;HIGH OCTET PRESET;figment
  73                            0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
  74                         */
  75                         else if (cp == 0x80)
  76                             name = "PADDING CHARACTER";
  77                         else if (cp == 0x81)
  78                             name = "HIGH OCTET PRESET";
  79                         else if (cp == 0x99)
  80                             name = "SINGLE GRAPHIC CHARACTER INTRODUCER";
  81                         else
  82                             continue;
  83                     } else if (name.startsWith("<")) {
  84                         /*
  85                           3400    <CJK Ideograph Extension A, First>
  86                           4db5    <CJK Ideograph Extension A, Last>
  87                           4e00    <CJK Ideograph, First>
  88                           9fc3    <CJK Ideograph, Last>
  89                           ac00    <Hangul Syllable, First>
  90                           d7a3    <Hangul Syllable, Last>
  91                           d800    <Non Private Use High Surrogate, First>
  92                           db7f    <Non Private Use High Surrogate, Last>
  93                           db80    <Private Use High Surrogate, First>
  94                           dbff    <Private Use High Surrogate, Last>
  95                           dc00    <Low Surrogate, First>
  96                           dfff    <Low Surrogate, Last>
  97                           e000    <Private Use, First>
  98                           f8ff    <Private Use, Last>
  99                          20000    <CJK Ideograph Extension B, First>
 100                          2a6d6    <CJK Ideograph Extension B, Last>
 101                          f0000    <Plane 15 Private Use, First>
 102                          ffffd    <Plane 15 Private Use, Last>
 103                         */
 104                         continue;
 105                     }
 106                     cpNum++;
 107                     if (!cpBlocks[cp >> 8]) {
 108                         cpBlocks[cp >> 8] = true;
 109                         bkNum++;
 110                     }
 111                     if (cp == lastCp + 1) {
 112                         cpBB.put((byte)name.length());
 113                     } else {
 114                         cpBB.put((byte)0);  // segment start flag
 115                         cpBB.putInt((name.length() << 24) | (cp & 0xffffff));
 116                     }
 117                     namePool.append(name);
 118                     lastCp = cp;
 119                 }
 120             }
 121 
 122             byte[] namePoolBytes = namePool.toString().getBytes("ASCII");
 123             int cpLen = cpBB.position();
 124             int total = cpLen + namePoolBytes.length;
 125             DataOutputStream dos = new DataOutputStream(
 126                                        new DeflaterOutputStream(
 127                                            new FileOutputStream(args[1])));
 128             dos.writeInt(total);  // total
 129             dos.writeInt(bkNum);  // bkNum;
 130             dos.writeInt(cpNum);  // cpNum
 131             dos.writeInt(cpLen);  // nameOff
 132             dos.write(cpPoolBytes, 0, cpLen);
 133             dos.write(namePoolBytes);
 134             dos.close();
 135 
 136         } catch (Throwable e) {
 137             System.out.println("Unexpected exception:");
 138             e.printStackTrace();
 139         } finally {
 140             if (reader != null) {
 141                 try {
 142                     reader.close();
 143                 } catch (Throwable ee) { ee.printStackTrace(); }
 144             }
 145         }
 146     }
 147 }