Old src/java.desktop/share/classes/sun/font/CMap.java

   1 /*
   2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.font;
  27 
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.IntBuffer;
  31 import java.util.Locale;
  32 import java.nio.charset.*;
  33 
  34 /*
  35  * A tt font has a CMAP table which is in turn made up of sub-tables which
  36  * describe the char to glyph mapping in (possibly) multiple ways.
  37  * CMAP subtables are described by 3 values.
  38  * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)
  39  * 2. Encoding (eg 0=symbol, 1=unicode)
  40  * 3. TrueType subtable format (how the char->glyph mapping for the encoding
  41  * is stored in the subtable). See the TrueType spec. Format 4 is required
  42  * by MS in fonts for windows. Its uses segmented mapping to delta values.
  43  * Most typically we see are (3,1,4) :
  44  * CMAP Platform ID=3 is what we use.
  45  * Encodings that are used in practice by JDK on Solaris are
  46  *  symbol (3,0)
  47  *  unicode (3,1)
  48  *  GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)
  49  * The format for almost all subtables is 4. However the solaris (3,5)
  50  * encodings are typically in format 2.
  51  */
  52 abstract class CMap {
  53 
  54 //     static char WingDings_b2c[] = {
  55 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  56 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  57 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  58 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  59 //         0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  60 //         0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  61 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  62 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,
  63 //         0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  64 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  65 //         0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,
  66 //         0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  67 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  68 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  69 //         0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,
  70 //         0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,
  71 //         0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
  72 //         0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,
  73 //         0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,
  74 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  75 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,
  76 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,
  77 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,
  78 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  79 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,
  80 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  81 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  82 //         0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,
  83 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  84 //         0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  85 //         0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  86 //         0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,
  87 //    };
  88 
  89 //     static char Symbols_b2c[] = {
  90 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  91 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  92 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  93 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  94 //         0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,
  95 //         0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,
  96 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  97 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  98 //         0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
  99 //         0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
 100 //         0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
 101 //         0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,
 102 //         0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
 103 //         0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
 104 //         0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
 105 //         0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,
 106 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 107 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 108 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 109 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 110 //         0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,
 111 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 112 //         0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,
 113 //         0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,
 114 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,
 115 //         0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
 116 //         0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,
 117 //         0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 118 //         0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,
 119 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 120 //         0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 121 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 122 //     };
 123 
 124     static final short ShiftJISEncoding = 2;
 125     static final short GBKEncoding      = 3;
 126     static final short Big5Encoding     = 4;
 127     static final short WansungEncoding  = 5;
 128     static final short JohabEncoding    = 6;
 129     static final short MSUnicodeSurrogateEncoding = 10;
 130 
 131     static final char noSuchChar = (char)0xfffd;
 132     static final int SHORTMASK = 0x0000ffff;
 133     static final int INTMASK   = 0xffffffff;
 134 
 135     static final char[][] converterMaps = new char[7][];
 136 
 137     /*
 138      * Unicode->other encoding translation array. A pre-computed look up
 139      * which can be shared across all fonts using that encoding.
 140      * Using this saves running character coverters repeatedly.
 141      */
 142     char[] xlat;
 143 
 144     static CMap initialize(TrueTypeFont font) {
 145 
 146         CMap cmap = null;
 147 
 148         int offset, platformID, encodingID=-1;
 149 
 150         int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,
 151             three6=0, three10=0;
 152         boolean threeStar = false;
 153 
 154         ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);
 155         int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);
 156         short numberSubTables = cmapBuffer.getShort(2);
 157 
 158         /* locate the offsets of all 3,*  (ie Microsoft platform) encodings */
 159         for (int i=0; i<numberSubTables; i++) {
 160             cmapBuffer.position(i * 8 + 4);
 161             platformID = cmapBuffer.getShort();
 162             if (platformID == 3) {
 163                 threeStar = true;
 164                 encodingID = cmapBuffer.getShort();
 165                 offset     = cmapBuffer.getInt();
 166                 switch (encodingID) {
 167                 case 0:  three0  = offset; break; // MS Symbol encoding
 168                 case 1:  three1  = offset; break; // MS Unicode cmap
 169                 case 2:  three2  = offset; break; // ShiftJIS cmap.
 170                 case 3:  three3  = offset; break; // GBK cmap
 171                 case 4:  three4  = offset; break; // Big 5 cmap
 172                 case 5:  three5  = offset; break; // Wansung
 173                 case 6:  three6  = offset; break; // Johab
 174                 case 10: three10 = offset; break; // MS Unicode surrogates
 175                 }
 176             }
 177         }
 178 
 179         /* This defines the preference order for cmap subtables */
 180         if (threeStar) {
 181             if (three10 != 0) {
 182                 cmap = createCMap(cmapBuffer, three10, null);
 183             }
 184             else if  (three0 != 0) {
 185                 /* The special case treatment of these fonts leads to
 186                  * anomalies where a user can view "wingdings" and "wingdings2"
 187                  * and the latter shows all its code points in the unicode
 188                  * private use area at 0xF000->0XF0FF and the former shows
 189                  * a scattered subset of its glyphs that are known mappings to
 190                  * unicode code points.
 191                  * The primary purpose of these mappings was to facilitate
 192                  * display of symbol chars etc in composite fonts, however
 193                  * this is not needed as all these code points are covered
 194                  * by Lucida Sans Regular.
 195                  * Commenting this out reduces the role of these two files
 196                  * (assuming that they continue to be used in font.properties)
 197                  * to just one of contributing to the overall composite
 198                  * font metrics, and also AWT can still access the fonts.
 199                  * Clients which explicitly accessed these fonts as names
 200                  * "Symbol" and "Wingdings" (ie as physical fonts) and
 201                  * expected to see a scattering of these characters will
 202                  * see them now as missing. How much of a problem is this?
 203                  * Perhaps we could still support this mapping just for
 204                  * "Symbol.ttf" but I suspect some users would prefer it
 205                  * to be mapped in to the Latin range as that is how
 206                  * the "symbol" font is used in native apps.
 207                  */
 208 //              String name = font.platName.toLowerCase(Locale.ENGLISH);
 209 //              if (name.endsWith("symbol.ttf")) {
 210 //                  cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);
 211 //              } else if (name.endsWith("wingding.ttf")) {
 212 //                  cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);
 213 //              } else {
 214                     cmap = createCMap(cmapBuffer, three0, null);
 215 //              }
 216             }
 217             else if (three1 != 0) {
 218                 cmap = createCMap(cmapBuffer, three1, null);
 219             }
 220             else if (three2 != 0) {
 221                 cmap = createCMap(cmapBuffer, three2,
 222                                   getConverterMap(ShiftJISEncoding));
 223             }
 224             else if (three3 != 0) {
 225                 cmap = createCMap(cmapBuffer, three3,
 226                                   getConverterMap(GBKEncoding));
 227             }
 228             else if (three4 != 0) {
 229                 /* GB2312 TrueType fonts on Solaris have wrong encoding ID for
 230                  * cmap table, these fonts have EncodingID 4 which is Big5
 231                  * encoding according the TrueType spec, but actually the
 232                  * fonts are using gb2312 encoding, have to use this
 233                  * workaround to make Solaris zh_CN locale work.  -sherman
 234                  */
 235                 if (FontUtilities.isSolaris && font.platName != null &&
 236                     (font.platName.startsWith(
 237                      "/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") ||
 238                      font.platName.startsWith(
 239                      "/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") ||
 240                      font.platName.startsWith(
 241                      "/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) {
 242                     cmap = createCMap(cmapBuffer, three4,
 243                                        getConverterMap(GBKEncoding));
 244                 }
 245                 else {
 246                     cmap = createCMap(cmapBuffer, three4,
 247                                       getConverterMap(Big5Encoding));
 248                 }
 249             }
 250             else if (three5 != 0) {
 251                 cmap = createCMap(cmapBuffer, three5,
 252                                   getConverterMap(WansungEncoding));
 253             }
 254             else if (three6 != 0) {
 255                 cmap = createCMap(cmapBuffer, three6,
 256                                   getConverterMap(JohabEncoding));
 257             }
 258         } else {
 259             /* No 3,* subtable was found. Just use whatever is the first
 260              * table listed. Not very useful but maybe better than
 261              * rejecting the font entirely?
 262              */
 263             cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);
 264         }
 265         return cmap;
 266     }
 267 
 268     /* speed up the converting by setting the range for double
 269      * byte characters;
 270      */
 271     static char[] getConverter(short encodingID) {
 272         int dBegin = 0x8000;
 273         int dEnd   = 0xffff;
 274         String encoding;
 275 
 276         switch (encodingID) {
 277         case ShiftJISEncoding:
 278             dBegin = 0x8140;
 279             dEnd   = 0xfcfc;
 280             encoding = "SJIS";
 281             break;
 282         case GBKEncoding:
 283             dBegin = 0x8140;
 284             dEnd   = 0xfea0;
 285             encoding = "GBK";
 286             break;
 287         case Big5Encoding:
 288             dBegin = 0xa140;
 289             dEnd   = 0xfefe;
 290             encoding = "Big5";
 291             break;
 292         case WansungEncoding:
 293             dBegin = 0xa1a1;
 294             dEnd   = 0xfede;
 295             encoding = "EUC_KR";
 296             break;
 297         case JohabEncoding:
 298             dBegin = 0x8141;
 299             dEnd   = 0xfdfe;
 300             encoding = "Johab";
 301             break;
 302         default:
 303             return null;
 304         }
 305 
 306         try {
 307             char[] convertedChars = new char[65536];
 308             for (int i=0; i<65536; i++) {
 309                 convertedChars[i] = noSuchChar;
 310             }
 311 
 312             byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];
 313             char[] outputChars = new char[(dEnd-dBegin+1)];
 314 
 315             int j = 0;
 316             int firstByte;
 317             if (encodingID == ShiftJISEncoding) {
 318                 for (int i = dBegin; i <= dEnd; i++) {
 319                     firstByte = (i >> 8 & 0xff);
 320                     if (firstByte >= 0xa1 && firstByte <= 0xdf) {
 321                         //sjis halfwidth katakana
 322                         inputBytes[j++] = (byte)0xff;
 323                         inputBytes[j++] = (byte)0xff;
 324                     } else {
 325                         inputBytes[j++] = (byte)firstByte;
 326                         inputBytes[j++] = (byte)(i & 0xff);
 327                     }
 328                 }
 329             } else {
 330                 for (int i = dBegin; i <= dEnd; i++) {
 331                     inputBytes[j++] = (byte)(i>>8 & 0xff);
 332                     inputBytes[j++] = (byte)(i & 0xff);
 333                 }
 334             }
 335 
 336             Charset.forName(encoding).newDecoder()
 337             .onMalformedInput(CodingErrorAction.REPLACE)
 338             .onUnmappableCharacter(CodingErrorAction.REPLACE)
 339             .replaceWith("\u0000")
 340             .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),
 341                     CharBuffer.wrap(outputChars, 0, outputChars.length),
 342                     true);
 343 
 344             // ensure single byte ascii
 345             for (int i = 0x20; i <= 0x7e; i++) {
 346                 convertedChars[i] = (char)i;
 347             }
 348 
 349             //sjis halfwidth katakana
 350             if (encodingID == ShiftJISEncoding) {
 351                 for (int i = 0xa1; i <= 0xdf; i++) {
 352                     convertedChars[i] = (char)(i - 0xa1 + 0xff61);
 353                 }
 354             }
 355 
 356             /* It would save heap space (approx 60Kbytes for each of these
 357              * converters) if stored only valid ranges (ie returned
 358              * outputChars directly. But this is tricky since want to
 359              * include the ASCII range too.
 360              */
 361 //          System.err.println("oc.len="+outputChars.length);
 362 //          System.err.println("cc.len="+convertedChars.length);
 363 //          System.err.println("dbegin="+dBegin);
 364             System.arraycopy(outputChars, 0, convertedChars, dBegin,
 365                              outputChars.length);
 366 
 367             //return convertedChars;
 368             /* invert this map as now want it to map from Unicode
 369              * to other encoding.
 370              */
 371             char [] invertedChars = new char[65536];
 372             for (int i=0;i<65536;i++) {
 373                 if (convertedChars[i] != noSuchChar) {
 374                     invertedChars[convertedChars[i]] = (char)i;
 375                 }
 376             }
 377             return invertedChars;
 378 
 379         } catch (Exception e) {
 380             e.printStackTrace();
 381         }
 382         return null;
 383     }
 384 
 385     /*
 386      * The returned array maps to unicode from some other 2 byte encoding
 387      * eg for a 2byte index which represents a SJIS char, the indexed
 388      * value is the corresponding unicode char.
 389      */
 390     static char[] getConverterMap(short encodingID) {
 391         if (converterMaps[encodingID] == null) {
 392            converterMaps[encodingID] = getConverter(encodingID);
 393         }
 394         return converterMaps[encodingID];
 395     }
 396 
 397 
 398     static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {
 399         /* First do a sanity check that this cmap subtable is contained
 400          * within the cmap table.
 401          */
 402         int subtableFormat = buffer.getChar(offset);
 403         long subtableLength;
 404         if (subtableFormat < 8) {
 405             subtableLength = buffer.getChar(offset+2);
 406         } else {
 407             subtableLength = buffer.getInt(offset+4) & INTMASK;
 408         }
 409         if (offset+subtableLength > buffer.capacity()) {
 410             if (FontUtilities.isLogging()) {
 411                 FontUtilities.getLogger().warning("Cmap subtable overflows buffer.");
 412             }
 413         }
 414         switch (subtableFormat) {
 415         case 0:  return new CMapFormat0(buffer, offset);
 416         case 2:  return new CMapFormat2(buffer, offset, xlat);
 417         case 4:  return new CMapFormat4(buffer, offset, xlat);
 418         case 6:  return new CMapFormat6(buffer, offset, xlat);
 419         case 8:  return new CMapFormat8(buffer, offset, xlat);
 420         case 10: return new CMapFormat10(buffer, offset, xlat);
 421         case 12: return new CMapFormat12(buffer, offset, xlat);
 422         default: throw new RuntimeException("Cmap format unimplemented: " +
 423                                             (int)buffer.getChar(offset));
 424         }
 425     }
 426 
 427 /*
 428     final char charVal(byte[] cmap, int index) {
 429         return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
 430     }
 431 
 432     final short shortVal(byte[] cmap, int index) {
 433         return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
 434     }
 435 */
 436     abstract char getGlyph(int charCode);
 437 
 438     /* Format 4 Header is
 439      * ushort format (off=0)
 440      * ushort length (off=2)
 441      * ushort language (off=4)
 442      * ushort segCountX2 (off=6)
 443      * ushort searchRange (off=8)
 444      * ushort entrySelector (off=10)
 445      * ushort rangeShift (off=12)
 446      * ushort endCount[segCount] (off=14)
 447      * ushort reservedPad
 448      * ushort startCount[segCount]
 449      * short idDelta[segCount]
 450      * idRangeOFfset[segCount]
 451      * ushort glyphIdArray[]
 452      */
 453     static class CMapFormat4 extends CMap {
 454         int segCount;
 455         int entrySelector;
 456         int rangeShift;
 457         char[] endCount;
 458         char[] startCount;
 459         short[] idDelta;
 460         char[] idRangeOffset;
 461         char[] glyphIds;
 462 
 463         CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {
 464 
 465             this.xlat = xlat;
 466 
 467             bbuffer.position(offset);
 468             CharBuffer buffer = bbuffer.asCharBuffer();
 469             buffer.get(); // skip, we already know format=4
 470             int subtableLength = buffer.get();
 471             /* Try to recover from some bad fonts which specify a subtable
 472              * length that would overflow the byte buffer holding the whole
 473              * cmap table. If this isn't a recoverable situation an exception
 474              * may be thrown which is caught higher up the call stack.
 475              * Whilst this may seem lenient, in practice, unless the "bad"
 476              * subtable we are using is the last one in the cmap table we
 477              * would have no way of knowing about this problem anyway.
 478              */
 479             if (offset+subtableLength > bbuffer.capacity()) {
 480                 subtableLength = bbuffer.capacity() - offset;
 481             }
 482             buffer.get(); // skip language
 483             segCount = buffer.get()/2;
 484             int searchRange = buffer.get();
 485             entrySelector = buffer.get();
 486             rangeShift    = buffer.get()/2;
 487             startCount = new char[segCount];
 488             endCount = new char[segCount];
 489             idDelta = new short[segCount];
 490             idRangeOffset = new char[segCount];
 491 
 492             for (int i=0; i<segCount; i++) {
 493                 endCount[i] = buffer.get();
 494             }
 495             buffer.get(); // 2 bytes for reserved pad
 496             for (int i=0; i<segCount; i++) {
 497                 startCount[i] = buffer.get();
 498             }
 499 
 500             for (int i=0; i<segCount; i++) {
 501                 idDelta[i] = (short)buffer.get();
 502             }
 503 
 504             for (int i=0; i<segCount; i++) {
 505                 char ctmp = buffer.get();
 506                 idRangeOffset[i] = (char)((ctmp>>1)&0xffff);
 507             }
 508             /* Can calculate the number of glyph IDs by subtracting
 509              * "pos" from the length of the cmap
 510              */
 511             int pos = (segCount*8+16)/2;
 512             buffer.position(pos);
 513             int numGlyphIds = (subtableLength/2 - pos);
 514             glyphIds = new char[numGlyphIds];
 515             for (int i=0;i<numGlyphIds;i++) {
 516                 glyphIds[i] = buffer.get();
 517             }
 518 /*
 519             System.err.println("segcount="+segCount);
 520             System.err.println("entrySelector="+entrySelector);
 521             System.err.println("rangeShift="+rangeShift);
 522             for (int j=0;j<segCount;j++) {
 523               System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+
 524                                  " ec="+(int)(endCount[j]&0xffff)+
 525                                  " delta="+idDelta[j] +
 526                                  " ro="+(int)idRangeOffset[j]);
 527             }
 528 
 529             //System.err.println("numglyphs="+glyphIds.length);
 530             for (int i=0;i<numGlyphIds;i++) {
 531                   System.err.println("gid["+i+"]="+(int)glyphIds[i]);
 532             }
 533 */
 534         }
 535 
 536         char getGlyph(int charCode) {
 537 
 538             int index = 0;
 539             char glyphCode = 0;
 540 
 541             int controlGlyph = getControlCodeGlyph(charCode, true);
 542             if (controlGlyph >= 0) {
 543                 return (char)controlGlyph;
 544             }
 545 
 546             /* presence of translation array indicates that this
 547              * cmap is in some other (non-unicode encoding).
 548              * In order to look-up a char->glyph mapping we need to
 549              * translate the unicode code point to the encoding of
 550              * the cmap.
 551              * REMIND: VALID CHARCODES??
 552              */
 553             if (xlat != null) {
 554                 charCode = xlat[charCode];
 555             }
 556 
 557             /*
 558              * Citation from the TrueType (and OpenType) spec:
 559              *   The segments are sorted in order of increasing endCode
 560              *   values, and the segment values are specified in four parallel
 561              *   arrays. You search for the first endCode that is greater than
 562              *   or equal to the character code you want to map. If the
 563              *   corresponding startCode is less than or equal to the
 564              *   character code, then you use the corresponding idDelta and
 565              *   idRangeOffset to map the character code to a glyph index
 566              *   (otherwise, the missingGlyph is returned).
 567              */
 568 
 569             /*
 570              * CMAP format4 defines several fields for optimized search of
 571              * the segment list (entrySelector, searchRange, rangeShift).
 572              * However, benefits are neglible and some fonts have incorrect
 573              * data - so we use straightforward binary search (see bug 6247425)
 574              */
 575             int left = 0, right = startCount.length;
 576             index = startCount.length >> 1;
 577             while (left < right) {
 578                 if (endCount[index] < charCode) {
 579                     left = index + 1;
 580                 } else {
 581                     right = index;
 582                 }
 583                 index = (left + right) >> 1;
 584             }
 585 
 586             if (charCode >= startCount[index] && charCode <= endCount[index]) {
 587                 int rangeOffset = idRangeOffset[index];
 588 
 589                 if (rangeOffset == 0) {
 590                     glyphCode = (char)(charCode + idDelta[index]);
 591                 } else {
 592                     /* Calculate an index into the glyphIds array */
 593 
 594 /*
 595                     System.err.println("rangeoffset="+rangeOffset+
 596                                        " charCode=" + charCode +
 597                                        " scnt["+index+"]="+(int)startCount[index] +
 598                                        " segCnt="+segCount);
 599 */
 600 
 601                     int glyphIDIndex = rangeOffset - segCount + index
 602                                          + (charCode - startCount[index]);
 603                     glyphCode = glyphIds[glyphIDIndex];
 604                     if (glyphCode != 0) {
 605                         glyphCode = (char)(glyphCode + idDelta[index]);
 606                     }
 607                 }
 608             }
 609             if (glyphCode != 0) {
 610             //System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode);
 611             }
 612             return glyphCode;
 613         }
 614     }
 615 
 616     // Format 0: Byte Encoding table
 617     static class CMapFormat0 extends CMap {
 618         byte [] cmap;
 619 
 620         CMapFormat0(ByteBuffer buffer, int offset) {
 621 
 622             /* skip 6 bytes of format, length, and version */
 623             int len = buffer.getChar(offset+2);
 624             cmap = new byte[len-6];
 625             buffer.position(offset+6);
 626             buffer.get(cmap);
 627         }
 628 
 629         char getGlyph(int charCode) {
 630             if (charCode < 256) {
 631                 if (charCode < 0x0010) {
 632                     switch (charCode) {
 633                     case 0x0009:
 634                     case 0x000a:
 635                     case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
 636                     }
 637                 }
 638                 return (char)(0xff & cmap[charCode]);
 639             } else {
 640                 return 0;
 641             }
 642         }
 643     }
 644 
 645 //     static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {
 646 
 647 //      CMap cmap = createCMap(buffer, offset, null);
 648 //      if (cmap == null) {
 649 //          return null;
 650 //      } else {
 651 //          return new CMapFormatSymbol(cmap, syms);
 652 //      }
 653 //     }
 654 
 655 //     static class CMapFormatSymbol extends CMap {
 656 
 657 //      CMap cmap;
 658 //      static final int NUM_BUCKETS = 128;
 659 //      Bucket[] buckets = new Bucket[NUM_BUCKETS];
 660 
 661 //      class Bucket {
 662 //          char unicode;
 663 //          char glyph;
 664 //          Bucket next;
 665 
 666 //          Bucket(char u, char g) {
 667 //              unicode = u;
 668 //              glyph = g;
 669 //          }
 670 //      }
 671 
 672 //      CMapFormatSymbol(CMap cmap, char[] syms) {
 673 
 674 //          this.cmap = cmap;
 675 
 676 //          for (int i=0;i<syms.length;i++) {
 677 //              char unicode = syms[i];
 678 //              if (unicode != noSuchChar) {
 679 //                  char glyph = cmap.getGlyph(i + 0xf000);
 680 //                  int hash = unicode % NUM_BUCKETS;
 681 //                  Bucket bucket = new Bucket(unicode, glyph);
 682 //                  if (buckets[hash] == null) {
 683 //                      buckets[hash] = bucket;
 684 //                  } else {
 685 //                      Bucket b = buckets[hash];
 686 //                      while (b.next != null) {
 687 //                          b = b.next;
 688 //                      }
 689 //                      b.next = bucket;
 690 //                  }
 691 //              }
 692 //          }
 693 //      }
 694 
 695 //      char getGlyph(int unicode) {
 696 //          if (unicode >= 0x1000) {
 697 //              return 0;
 698 //          }
 699 //          else if (unicode >=0xf000 && unicode < 0xf100) {
 700 //              return cmap.getGlyph(unicode);
 701 //          } else {
 702 //              Bucket b = buckets[unicode % NUM_BUCKETS];
 703 //              while (b != null) {
 704 //                  if (b.unicode == unicode) {
 705 //                      return b.glyph;
 706 //                  } else {
 707 //                      b = b.next;
 708 //                  }
 709 //              }
 710 //              return 0;
 711 //          }
 712 //      }
 713 //     }
 714 
 715     // Format 2: High-byte mapping through table
 716     static class CMapFormat2 extends CMap {
 717 
 718         char[] subHeaderKey = new char[256];
 719          /* Store subheaders in individual arrays
 720           * A SubHeader entry theortically looks like {
 721           *   char firstCode;
 722           *   char entryCount;
 723           *   short idDelta;
 724           *   char idRangeOffset;
 725           * }
 726           */
 727         char[] firstCodeArray;
 728         char[] entryCountArray;
 729         short[] idDeltaArray;
 730         char[] idRangeOffSetArray;
 731 
 732         char[] glyphIndexArray;
 733 
 734         CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {
 735 
 736             this.xlat = xlat;
 737 
 738             int tableLen = buffer.getChar(offset+2);
 739             buffer.position(offset+6);
 740             CharBuffer cBuffer = buffer.asCharBuffer();
 741             char maxSubHeader = 0;
 742             for (int i=0;i<256;i++) {
 743                 subHeaderKey[i] = cBuffer.get();
 744                 if (subHeaderKey[i] > maxSubHeader) {
 745                     maxSubHeader = subHeaderKey[i];
 746                 }
 747             }
 748             /* The value of the subHeaderKey is 8 * the subHeader index,
 749              * so the number of subHeaders can be obtained by dividing
 750              * this value bv 8 and adding 1.
 751              */
 752             int numSubHeaders = (maxSubHeader >> 3) +1;
 753             firstCodeArray = new char[numSubHeaders];
 754             entryCountArray = new char[numSubHeaders];
 755             idDeltaArray  = new short[numSubHeaders];
 756             idRangeOffSetArray  = new char[numSubHeaders];
 757             for (int i=0; i<numSubHeaders; i++) {
 758                 firstCodeArray[i] = cBuffer.get();
 759                 entryCountArray[i] = cBuffer.get();
 760                 idDeltaArray[i] = (short)cBuffer.get();
 761                 idRangeOffSetArray[i] = cBuffer.get();
 762 //              System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+
 763 //                                 " ec="+(int)entryCountArray[i]+
 764 //                                 " delta="+(int)idDeltaArray[i]+
 765 //                                 " offset="+(int)idRangeOffSetArray[i]);
 766             }
 767 
 768             int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;
 769             glyphIndexArray = new char[glyphIndexArrSize];
 770             for (int i=0; i<glyphIndexArrSize;i++) {
 771                 glyphIndexArray[i] = cBuffer.get();
 772             }
 773         }
 774 
 775         char getGlyph(int charCode) {
 776             int controlGlyph = getControlCodeGlyph(charCode, true);
 777             if (controlGlyph >= 0) {
 778                 return (char)controlGlyph;
 779             }
 780 
 781             if (xlat != null) {
 782                 charCode = xlat[charCode];
 783             }
 784 
 785             char highByte = (char)(charCode >> 8);
 786             char lowByte = (char)(charCode & 0xff);
 787             int key = subHeaderKey[highByte]>>3; // index into subHeaders
 788             char mapMe;
 789 
 790             if (key != 0) {
 791                 mapMe = lowByte;
 792             } else {
 793                 mapMe = highByte;
 794                 if (mapMe == 0) {
 795                     mapMe = lowByte;
 796                 }
 797             }
 798 
 799 //          System.err.println("charCode="+Integer.toHexString(charCode)+
 800 //                             " key="+key+ " mapMe="+Integer.toHexString(mapMe));
 801             char firstCode = firstCodeArray[key];
 802             if (mapMe < firstCode) {
 803                 return 0;
 804             } else {
 805                 mapMe -= firstCode;
 806             }
 807 
 808             if (mapMe < entryCountArray[key]) {
 809                 /* "address" arithmetic is needed to calculate the offset
 810                  * into glyphIndexArray. "idRangeOffSetArray[key]" specifies
 811                  * the number of bytes from that location in the table where
 812                  * the subarray of glyphIndexes starting at "firstCode" begins.
 813                  * Each entry in the subHeader table is 8 bytes, and the
 814                  * idRangeOffSetArray field is at offset 6 in the entry.
 815                  * The glyphIndexArray immediately follows the subHeaders.
 816                  * So if there are "N" entries then the number of bytes to the
 817                  * start of glyphIndexArray is (N-key)*8-6.
 818                  * Subtract this from the idRangeOffSetArray value to get
 819                  * the number of bytes into glyphIndexArray and divide by 2 to
 820                  * get the (char) array index.
 821                  */
 822                 int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;
 823                 int glyphSubArrayStart =
 824                         (idRangeOffSetArray[key] - glyphArrayOffset)/2;
 825                 char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];
 826                 if (glyphCode != 0) {
 827                     glyphCode += idDeltaArray[key]; //idDelta
 828                     return glyphCode;
 829                 }
 830             }
 831             return 0;
 832         }
 833     }
 834 
 835     // Format 6: Trimmed table mapping
 836     static class CMapFormat6 extends CMap {
 837 
 838         char firstCode;
 839         char entryCount;
 840         char[] glyphIdArray;
 841 
 842         CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {
 843 
 844              bbuffer.position(offset+6);
 845              CharBuffer buffer = bbuffer.asCharBuffer();
 846              firstCode = buffer.get();
 847              entryCount = buffer.get();
 848              glyphIdArray = new char[entryCount];
 849              for (int i=0; i< entryCount; i++) {
 850                  glyphIdArray[i] = buffer.get();
 851              }
 852          }
 853 
 854          char getGlyph(int charCode) {
 855             int controlGlyph = getControlCodeGlyph(charCode, true);
 856             if (controlGlyph >= 0) {
 857                 return (char)controlGlyph;
 858             }
 859 
 860              if (xlat != null) {
 861                  charCode = xlat[charCode];
 862              }
 863 
 864              charCode -= firstCode;
 865              if (charCode < 0 || charCode >= entryCount) {
 866                   return 0;
 867              } else {
 868                   return glyphIdArray[charCode];
 869              }
 870          }
 871     }
 872 
 873     // Format 8: mixed 16-bit and 32-bit coverage
 874     // Seems unlikely this code will ever get tested as we look for
 875     // MS platform Cmaps and MS states (in the Opentype spec on their website)
 876     // that MS doesn't support this format
 877     static class CMapFormat8 extends CMap {
 878          byte[] is32 = new byte[8192];
 879          int nGroups;
 880          int[] startCharCode;
 881          int[] endCharCode;
 882          int[] startGlyphID;
 883 
 884          CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {
 885 
 886              bbuffer.position(12);
 887              bbuffer.get(is32);
 888              nGroups = bbuffer.getInt();
 889              startCharCode = new int[nGroups];
 890              endCharCode   = new int[nGroups];
 891              startGlyphID  = new int[nGroups];
 892          }
 893 
 894         char getGlyph(int charCode) {
 895             if (xlat != null) {
 896                 throw new RuntimeException("xlat array for cmap fmt=8");
 897             }
 898             return 0;
 899         }
 900 
 901     }
 902 
 903 
 904     // Format 4-byte 10: Trimmed table mapping
 905     // Seems unlikely this code will ever get tested as we look for
 906     // MS platform Cmaps and MS states (in the Opentype spec on their website)
 907     // that MS doesn't support this format
 908     static class CMapFormat10 extends CMap {
 909 
 910          long firstCode;
 911          int entryCount;
 912          char[] glyphIdArray;
 913 
 914          CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {
 915 
 916              firstCode = bbuffer.getInt() & INTMASK;
 917              entryCount = bbuffer.getInt() & INTMASK;
 918              bbuffer.position(offset+20);
 919              CharBuffer buffer = bbuffer.asCharBuffer();
 920              glyphIdArray = new char[entryCount];
 921              for (int i=0; i< entryCount; i++) {
 922                  glyphIdArray[i] = buffer.get();
 923              }
 924          }
 925 
 926          char getGlyph(int charCode) {
 927 
 928              if (xlat != null) {
 929                  throw new RuntimeException("xlat array for cmap fmt=10");
 930              }
 931 
 932              int code = (int)(charCode - firstCode);
 933              if (code < 0 || code >= entryCount) {
 934                  return 0;
 935              } else {
 936                  return glyphIdArray[code];
 937              }
 938          }
 939     }
 940 
 941     // Format 12: Segmented coverage for UCS-4 (fonts supporting
 942     // surrogate pairs)
 943     static class CMapFormat12 extends CMap {
 944 
 945         int numGroups;
 946         int highBit =0;
 947         int power;
 948         int extra;
 949         long[] startCharCode;
 950         long[] endCharCode;
 951         int[] startGlyphID;
 952 
 953         CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {
 954             if (xlat != null) {
 955                 throw new RuntimeException("xlat array for cmap fmt=12");
 956             }
 957 
 958             numGroups = buffer.getInt(offset+12);
 959             startCharCode = new long[numGroups];
 960             endCharCode = new long[numGroups];
 961             startGlyphID = new int[numGroups];
 962             buffer.position(offset+16);
 963             buffer = buffer.slice();
 964             IntBuffer ibuffer = buffer.asIntBuffer();
 965             for (int i=0; i<numGroups; i++) {
 966                 startCharCode[i] = ibuffer.get() & INTMASK;
 967                 endCharCode[i] = ibuffer.get() & INTMASK;
 968                 startGlyphID[i] = ibuffer.get() & INTMASK;
 969             }
 970 
 971             /* Finds the high bit by binary searching through the bits */
 972             int value = numGroups;
 973 
 974             if (value >= 1 << 16) {
 975                 value >>= 16;
 976                 highBit += 16;
 977             }
 978 
 979             if (value >= 1 << 8) {
 980                 value >>= 8;
 981                 highBit += 8;
 982             }
 983 
 984             if (value >= 1 << 4) {
 985                 value >>= 4;
 986                 highBit += 4;
 987             }
 988 
 989             if (value >= 1 << 2) {
 990                 value >>= 2;
 991                 highBit += 2;
 992             }
 993 
 994             if (value >= 1 << 1) {
 995                 value >>= 1;
 996                 highBit += 1;
 997             }
 998 
 999             power = 1 << highBit;
1000             extra = numGroups - power;
1001         }
1002 
1003         char getGlyph(int charCode) {
1004             int controlGlyph = getControlCodeGlyph(charCode, false);
1005             if (controlGlyph >= 0) {
1006                 return (char)controlGlyph;
1007             }
1008             int probe = power;
1009             int range = 0;
1010 
1011             if (startCharCode[extra] <= charCode) {
1012                 range = extra;
1013             }
1014 
1015             while (probe > 1) {
1016                 probe >>= 1;
1017 
1018                 if (startCharCode[range+probe] <= charCode) {
1019                     range += probe;
1020                 }
1021             }
1022 
1023             if (startCharCode[range] <= charCode &&
1024                   endCharCode[range] >= charCode) {
1025                 return (char)
1026                     (startGlyphID[range] + (charCode - startCharCode[range]));
1027             }
1028 
1029             return 0;
1030         }
1031 
1032     }
1033 
1034     /* Used to substitute for bad Cmaps. */
1035     static class NullCMapClass extends CMap {
1036 
1037         char getGlyph(int charCode) {
1038             return 0;
1039         }
1040     }
1041 
1042     public static final NullCMapClass theNullCmap = new NullCMapClass();
1043 
1044     final int getControlCodeGlyph(int charCode, boolean noSurrogates) {
1045         if (charCode < 0x0010) {
1046             switch (charCode) {
1047             case 0x0009:
1048             case 0x000a:
1049             case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1050             }
1051         } else if (charCode >= 0x200c) {
1052             if ((charCode <= 0x200f) ||
1053                 (charCode >= 0x2028 && charCode <= 0x202e) ||
1054                 (charCode >= 0x206a && charCode <= 0x206f)) {
1055                 return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1056             } else if (noSurrogates && charCode >= 0xFFFF) {
1057                 return 0;
1058             }
1059         }
1060         return -1;
1061     }
1062 }