make/src/classes/build/tools/generatecharacter/CharacterName.java
Print this page
@@ -12,17 +12,18 @@
try {
if (args.length != 2) {
System.err.println("Usage: java CharacterName UnicodeData.txt uniName.dat");
System.exit(1);
}
-
reader = new FileReader(args[0]);
BufferedReader bfr = new BufferedReader(reader);
String line = null;
StringBuilder namePool = new StringBuilder();
byte[] cpPoolBytes = new byte[0x100000];
+ boolean[] cpBlocks = new boolean[(Character.MAX_CODE_POINT + 1) >> 8];
+ int bkNum = 0;
ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes);
int lastCp = 0;
int cpNum = 0;
while ((line = bfr.readLine()) != null) {
@@ -30,14 +31,30 @@
continue;
UnicodeSpec spec = UnicodeSpec.parse(line);
if (spec != null) {
int cp = spec.getCodePoint();
String name = spec.getName();
- cpNum++;
if (name.equals("<control>") && spec.getOldName() != null) {
- if (spec.getOldName().length() != 0)
+ if (cp == 0x7) // <control>BELL -> BEL; u+1f514 <-> BELL
+ name = "BEL";
+ else if (spec.getOldName().length() != 0)
name = spec.getOldName();
+ /*
+ 3 "figment" characters from NameAliases.txt
+ Several documented labels for C1 control code points which
+ were never actually approved in any standard...but were
+ implemented in Perl regex.
+ 0080;PADDING CHARACTER;figment
+ 0081;HIGH OCTET PRESET;figment
+ 0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
+ */
+ else if (cp == 0x80)
+ name = "PADDING CHARACTER";
+ else if (cp == 0x81)
+ name = "HIGH OCTET PRESET";
+ else if (cp == 0x99)
+ name = "SINGLE GRAPHIC CHARACTER INTRODUCER";
else
continue;
} else if (name.startsWith("<")) {
/*
3400 <CJK Ideograph Extension A, First>
@@ -59,11 +76,15 @@
f0000 <Plane 15 Private Use, First>
ffffd <Plane 15 Private Use, Last>
*/
continue;
}
-
+ cpNum++;
+ if (!cpBlocks[cp >> 8]) {
+ cpBlocks[cp >> 8] = true;
+ bkNum++;
+ }
if (cp == lastCp + 1) {
cpBB.put((byte)name.length());
} else {
cpBB.put((byte)0); // segment start flag
cpBB.putInt((name.length() << 24) | (cp & 0xffffff));
@@ -74,15 +95,16 @@
}
byte[] namePoolBytes = namePool.toString().getBytes("ASCII");
int cpLen = cpBB.position();
int total = cpLen + namePoolBytes.length;
-
DataOutputStream dos = new DataOutputStream(
new DeflaterOutputStream(
new FileOutputStream(args[1])));
dos.writeInt(total); // total
+ dos.writeInt(bkNum); // bkNum;
+ dos.writeInt(cpNum); // cpNum
dos.writeInt(cpLen); // nameOff
dos.write(cpPoolBytes, 0, cpLen);
dos.write(namePoolBytes);
dos.close();