1 /* 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.font; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.IntBuffer; 31 import java.util.Locale; 32 import java.nio.charset.*; 33 34 /* 35 * A tt font has a CMAP table which is in turn made up of sub-tables which 36 * describe the char to glyph mapping in (possibly) multiple ways. 37 * CMAP subtables are described by 3 values. 38 * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK) 39 * 2. Encoding (eg 0=symbol, 1=unicode) 40 * 3. TrueType subtable format (how the char->glyph mapping for the encoding 41 * is stored in the subtable). See the TrueType spec. Format 4 is required 42 * by MS in fonts for windows. Its uses segmented mapping to delta values. 43 * Most typically we see are (3,1,4) : 44 * CMAP Platform ID=3 is what we use. 45 * Encodings that are used in practice by JDK on Solaris are 46 * symbol (3,0) 47 * unicode (3,1) 48 * GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5) 49 * The format for almost all subtables is 4. However the solaris (3,5) 50 * encodings are typically in format 2. 51 */ 52 abstract class CMap { 53 54 // static char WingDings_b2c[] = { 55 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 56 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 57 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 58 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 59 // 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 60 // 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 61 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 62 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d, 63 // 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 64 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 65 // 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd, 66 // 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 67 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 68 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 69 // 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd, 70 // 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd, 71 // 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, 72 // 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d, 73 // 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd, 74 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 75 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd, 76 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735, 77 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd, 78 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 79 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd, 80 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 81 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 82 // 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd, 83 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 84 // 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 85 // 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 86 // 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd, 87 // }; 88 89 // static char Symbols_b2c[] = { 90 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 91 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 92 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 93 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 94 // 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d, 95 // 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd, 96 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 97 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 98 // 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393, 99 // 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, 100 // 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9, 101 // 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd, 102 // 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3, 103 // 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf, 104 // 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9, 105 // 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd, 106 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 107 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 108 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 109 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 110 // 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd, 111 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 112 // 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219, 113 // 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd, 114 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229, 115 // 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, 116 // 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5, 117 // 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 118 // 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd, 119 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 120 // 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 121 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 122 // }; 123 124 static final short ShiftJISEncoding = 2; 125 static final short GBKEncoding = 3; 126 static final short Big5Encoding = 4; 127 static final short WansungEncoding = 5; 128 static final short JohabEncoding = 6; 129 static final short MSUnicodeSurrogateEncoding = 10; 130 131 static final char noSuchChar = (char)0xfffd; 132 static final int SHORTMASK = 0x0000ffff; 133 static final int INTMASK = 0xffffffff; 134 135 static final char[][] converterMaps = new char[7][]; 136 137 /* 138 * Unicode->other encoding translation array. A pre-computed look up 139 * which can be shared across all fonts using that encoding. 140 * Using this saves running character coverters repeatedly. 141 */ 142 char[] xlat; 143 144 static CMap initialize(TrueTypeFont font) { 145 146 CMap cmap = null; 147 148 int offset, platformID, encodingID=-1; 149 150 int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0, 151 three6=0, three10=0; 152 boolean threeStar = false; 153 154 ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag); 155 int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag); 156 short numberSubTables = cmapBuffer.getShort(2); 157 158 /* locate the offsets of all 3,* (ie Microsoft platform) encodings */ 159 for (int i=0; i<numberSubTables; i++) { 160 cmapBuffer.position(i * 8 + 4); 161 platformID = cmapBuffer.getShort(); 162 if (platformID == 3) { 163 threeStar = true; 164 encodingID = cmapBuffer.getShort(); 165 offset = cmapBuffer.getInt(); 166 switch (encodingID) { 167 case 0: three0 = offset; break; // MS Symbol encoding 168 case 1: three1 = offset; break; // MS Unicode cmap 169 case 2: three2 = offset; break; // ShiftJIS cmap. 170 case 3: three3 = offset; break; // GBK cmap 171 case 4: three4 = offset; break; // Big 5 cmap 172 case 5: three5 = offset; break; // Wansung 173 case 6: three6 = offset; break; // Johab 174 case 10: three10 = offset; break; // MS Unicode surrogates 175 } 176 } 177 } 178 179 /* This defines the preference order for cmap subtables */ 180 if (threeStar) { 181 if (three10 != 0) { 182 cmap = createCMap(cmapBuffer, three10, null); 183 } 184 else if (three0 != 0) { 185 /* The special case treatment of these fonts leads to 186 * anomalies where a user can view "wingdings" and "wingdings2" 187 * and the latter shows all its code points in the unicode 188 * private use area at 0xF000->0XF0FF and the former shows 189 * a scattered subset of its glyphs that are known mappings to 190 * unicode code points. 191 * The primary purpose of these mappings was to facilitate 192 * display of symbol chars etc in composite fonts, however 193 * this is not needed as all these code points are covered 194 * by Lucida Sans Regular. 195 * Commenting this out reduces the role of these two files 196 * (assuming that they continue to be used in font.properties) 197 * to just one of contributing to the overall composite 198 * font metrics, and also AWT can still access the fonts. 199 * Clients which explicitly accessed these fonts as names 200 * "Symbol" and "Wingdings" (ie as physical fonts) and 201 * expected to see a scattering of these characters will 202 * see them now as missing. How much of a problem is this? 203 * Perhaps we could still support this mapping just for 204 * "Symbol.ttf" but I suspect some users would prefer it 205 * to be mapped in to the Latin range as that is how 206 * the "symbol" font is used in native apps. 207 */ 208 // String name = font.platName.toLowerCase(Locale.ENGLISH); 209 // if (name.endsWith("symbol.ttf")) { 210 // cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c); 211 // } else if (name.endsWith("wingding.ttf")) { 212 // cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c); 213 // } else { 214 cmap = createCMap(cmapBuffer, three0, null); 215 // } 216 } 217 else if (three1 != 0) { 218 cmap = createCMap(cmapBuffer, three1, null); 219 } 220 else if (three2 != 0) { 221 cmap = createCMap(cmapBuffer, three2, 222 getConverterMap(ShiftJISEncoding)); 223 } 224 else if (three3 != 0) { 225 cmap = createCMap(cmapBuffer, three3, 226 getConverterMap(GBKEncoding)); 227 } 228 else if (three4 != 0) { 229 /* GB2312 TrueType fonts on Solaris have wrong encoding ID for 230 * cmap table, these fonts have EncodingID 4 which is Big5 231 * encoding according the TrueType spec, but actually the 232 * fonts are using gb2312 encoding, have to use this 233 * workaround to make Solaris zh_CN locale work. -sherman 234 */ 235 if (FontUtilities.isSolaris && font.platName != null && 236 (font.platName.startsWith( 237 "/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") || 238 font.platName.startsWith( 239 "/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") || 240 font.platName.startsWith( 241 "/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) { 242 cmap = createCMap(cmapBuffer, three4, 243 getConverterMap(GBKEncoding)); 244 } 245 else { 246 cmap = createCMap(cmapBuffer, three4, 247 getConverterMap(Big5Encoding)); 248 } 249 } 250 else if (three5 != 0) { 251 cmap = createCMap(cmapBuffer, three5, 252 getConverterMap(WansungEncoding)); 253 } 254 else if (three6 != 0) { 255 cmap = createCMap(cmapBuffer, three6, 256 getConverterMap(JohabEncoding)); 257 } 258 } else { 259 /* No 3,* subtable was found. Just use whatever is the first 260 * table listed. Not very useful but maybe better than 261 * rejecting the font entirely? 262 */ 263 cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null); 264 } 265 return cmap; 266 } 267 268 /* speed up the converting by setting the range for double 269 * byte characters; 270 */ 271 static char[] getConverter(short encodingID) { 272 int dBegin = 0x8000; 273 int dEnd = 0xffff; 274 String encoding; 275 276 switch (encodingID) { 277 case ShiftJISEncoding: 278 dBegin = 0x8140; 279 dEnd = 0xfcfc; 280 encoding = "SJIS"; 281 break; 282 case GBKEncoding: 283 dBegin = 0x8140; 284 dEnd = 0xfea0; 285 encoding = "GBK"; 286 break; 287 case Big5Encoding: 288 dBegin = 0xa140; 289 dEnd = 0xfefe; 290 encoding = "Big5"; 291 break; 292 case WansungEncoding: 293 dBegin = 0xa1a1; 294 dEnd = 0xfede; 295 encoding = "EUC_KR"; 296 break; 297 case JohabEncoding: 298 dBegin = 0x8141; 299 dEnd = 0xfdfe; 300 encoding = "Johab"; 301 break; 302 default: 303 return null; 304 } 305 306 try { 307 char[] convertedChars = new char[65536]; 308 for (int i=0; i<65536; i++) { 309 convertedChars[i] = noSuchChar; 310 } 311 312 byte[] inputBytes = new byte[(dEnd-dBegin+1)*2]; 313 char[] outputChars = new char[(dEnd-dBegin+1)]; 314 315 int j = 0; 316 int firstByte; 317 if (encodingID == ShiftJISEncoding) { 318 for (int i = dBegin; i <= dEnd; i++) { 319 firstByte = (i >> 8 & 0xff); 320 if (firstByte >= 0xa1 && firstByte <= 0xdf) { 321 //sjis halfwidth katakana 322 inputBytes[j++] = (byte)0xff; 323 inputBytes[j++] = (byte)0xff; 324 } else { 325 inputBytes[j++] = (byte)firstByte; 326 inputBytes[j++] = (byte)(i & 0xff); 327 } 328 } 329 } else { 330 for (int i = dBegin; i <= dEnd; i++) { 331 inputBytes[j++] = (byte)(i>>8 & 0xff); 332 inputBytes[j++] = (byte)(i & 0xff); 333 } 334 } 335 336 Charset.forName(encoding).newDecoder() 337 .onMalformedInput(CodingErrorAction.REPLACE) 338 .onUnmappableCharacter(CodingErrorAction.REPLACE) 339 .replaceWith("\u0000") 340 .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length), 341 CharBuffer.wrap(outputChars, 0, outputChars.length), 342 true); 343 344 // ensure single byte ascii 345 for (int i = 0x20; i <= 0x7e; i++) { 346 convertedChars[i] = (char)i; 347 } 348 349 //sjis halfwidth katakana 350 if (encodingID == ShiftJISEncoding) { 351 for (int i = 0xa1; i <= 0xdf; i++) { 352 convertedChars[i] = (char)(i - 0xa1 + 0xff61); 353 } 354 } 355 356 /* It would save heap space (approx 60Kbytes for each of these 357 * converters) if stored only valid ranges (ie returned 358 * outputChars directly. But this is tricky since want to 359 * include the ASCII range too. 360 */ 361 // System.err.println("oc.len="+outputChars.length); 362 // System.err.println("cc.len="+convertedChars.length); 363 // System.err.println("dbegin="+dBegin); 364 System.arraycopy(outputChars, 0, convertedChars, dBegin, 365 outputChars.length); 366 367 //return convertedChars; 368 /* invert this map as now want it to map from Unicode 369 * to other encoding. 370 */ 371 char [] invertedChars = new char[65536]; 372 for (int i=0;i<65536;i++) { 373 if (convertedChars[i] != noSuchChar) { 374 invertedChars[convertedChars[i]] = (char)i; 375 } 376 } 377 return invertedChars; 378 379 } catch (Exception e) { 380 e.printStackTrace(); 381 } 382 return null; 383 } 384 385 /* 386 * The returned array maps to unicode from some other 2 byte encoding 387 * eg for a 2byte index which represents a SJIS char, the indexed 388 * value is the corresponding unicode char. 389 */ 390 static char[] getConverterMap(short encodingID) { 391 if (converterMaps[encodingID] == null) { 392 converterMaps[encodingID] = getConverter(encodingID); 393 } 394 return converterMaps[encodingID]; 395 } 396 397 398 static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) { 399 /* First do a sanity check that this cmap subtable is contained 400 * within the cmap table. 401 */ 402 int subtableFormat = buffer.getChar(offset); 403 long subtableLength; 404 if (subtableFormat < 8) { 405 subtableLength = buffer.getChar(offset+2); 406 } else { 407 subtableLength = buffer.getInt(offset+4) & INTMASK; 408 } 409 if (offset+subtableLength > buffer.capacity()) { 410 if (FontUtilities.isLogging()) { 411 FontUtilities.getLogger().warning("Cmap subtable overflows buffer."); 412 } 413 } 414 switch (subtableFormat) { 415 case 0: return new CMapFormat0(buffer, offset); 416 case 2: return new CMapFormat2(buffer, offset, xlat); 417 case 4: return new CMapFormat4(buffer, offset, xlat); 418 case 6: return new CMapFormat6(buffer, offset, xlat); 419 case 8: return new CMapFormat8(buffer, offset, xlat); 420 case 10: return new CMapFormat10(buffer, offset, xlat); 421 case 12: return new CMapFormat12(buffer, offset, xlat); 422 default: throw new RuntimeException("Cmap format unimplemented: " + 423 (int)buffer.getChar(offset)); 424 } 425 } 426 427 /* 428 final char charVal(byte[] cmap, int index) { 429 return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); 430 } 431 432 final short shortVal(byte[] cmap, int index) { 433 return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); 434 } 435 */ 436 abstract char getGlyph(int charCode); 437 438 /* Format 4 Header is 439 * ushort format (off=0) 440 * ushort length (off=2) 441 * ushort language (off=4) 442 * ushort segCountX2 (off=6) 443 * ushort searchRange (off=8) 444 * ushort entrySelector (off=10) 445 * ushort rangeShift (off=12) 446 * ushort endCount[segCount] (off=14) 447 * ushort reservedPad 448 * ushort startCount[segCount] 449 * short idDelta[segCount] 450 * idRangeOFfset[segCount] 451 * ushort glyphIdArray[] 452 */ 453 static class CMapFormat4 extends CMap { 454 int segCount; 455 int entrySelector; 456 int rangeShift; 457 char[] endCount; 458 char[] startCount; 459 short[] idDelta; 460 char[] idRangeOffset; 461 char[] glyphIds; 462 463 CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) { 464 465 this.xlat = xlat; 466 467 bbuffer.position(offset); 468 CharBuffer buffer = bbuffer.asCharBuffer(); 469 buffer.get(); // skip, we already know format=4 470 int subtableLength = buffer.get(); 471 /* Try to recover from some bad fonts which specify a subtable 472 * length that would overflow the byte buffer holding the whole 473 * cmap table. If this isn't a recoverable situation an exception 474 * may be thrown which is caught higher up the call stack. 475 * Whilst this may seem lenient, in practice, unless the "bad" 476 * subtable we are using is the last one in the cmap table we 477 * would have no way of knowing about this problem anyway. 478 */ 479 if (offset+subtableLength > bbuffer.capacity()) { 480 subtableLength = bbuffer.capacity() - offset; 481 } 482 buffer.get(); // skip language 483 segCount = buffer.get()/2; 484 int searchRange = buffer.get(); 485 entrySelector = buffer.get(); 486 rangeShift = buffer.get()/2; 487 startCount = new char[segCount]; 488 endCount = new char[segCount]; 489 idDelta = new short[segCount]; 490 idRangeOffset = new char[segCount]; 491 492 for (int i=0; i<segCount; i++) { 493 endCount[i] = buffer.get(); 494 } 495 buffer.get(); // 2 bytes for reserved pad 496 for (int i=0; i<segCount; i++) { 497 startCount[i] = buffer.get(); 498 } 499 500 for (int i=0; i<segCount; i++) { 501 idDelta[i] = (short)buffer.get(); 502 } 503 504 for (int i=0; i<segCount; i++) { 505 char ctmp = buffer.get(); 506 idRangeOffset[i] = (char)((ctmp>>1)&0xffff); 507 } 508 /* Can calculate the number of glyph IDs by subtracting 509 * "pos" from the length of the cmap 510 */ 511 int pos = (segCount*8+16)/2; 512 buffer.position(pos); 513 int numGlyphIds = (subtableLength/2 - pos); 514 glyphIds = new char[numGlyphIds]; 515 for (int i=0;i<numGlyphIds;i++) { 516 glyphIds[i] = buffer.get(); 517 } 518 /* 519 System.err.println("segcount="+segCount); 520 System.err.println("entrySelector="+entrySelector); 521 System.err.println("rangeShift="+rangeShift); 522 for (int j=0;j<segCount;j++) { 523 System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+ 524 " ec="+(int)(endCount[j]&0xffff)+ 525 " delta="+idDelta[j] + 526 " ro="+(int)idRangeOffset[j]); 527 } 528 529 //System.err.println("numglyphs="+glyphIds.length); 530 for (int i=0;i<numGlyphIds;i++) { 531 System.err.println("gid["+i+"]="+(int)glyphIds[i]); 532 } 533 */ 534 } 535 536 char getGlyph(int charCode) { 537 538 int index = 0; 539 char glyphCode = 0; 540 541 int controlGlyph = getControlCodeGlyph(charCode, true); 542 if (controlGlyph >= 0) { 543 return (char)controlGlyph; 544 } 545 546 /* presence of translation array indicates that this 547 * cmap is in some other (non-unicode encoding). 548 * In order to look-up a char->glyph mapping we need to 549 * translate the unicode code point to the encoding of 550 * the cmap. 551 * REMIND: VALID CHARCODES?? 552 */ 553 if (xlat != null) { 554 charCode = xlat[charCode]; 555 } 556 557 /* 558 * Citation from the TrueType (and OpenType) spec: 559 * The segments are sorted in order of increasing endCode 560 * values, and the segment values are specified in four parallel 561 * arrays. You search for the first endCode that is greater than 562 * or equal to the character code you want to map. If the 563 * corresponding startCode is less than or equal to the 564 * character code, then you use the corresponding idDelta and 565 * idRangeOffset to map the character code to a glyph index 566 * (otherwise, the missingGlyph is returned). 567 */ 568 569 /* 570 * CMAP format4 defines several fields for optimized search of 571 * the segment list (entrySelector, searchRange, rangeShift). 572 * However, benefits are neglible and some fonts have incorrect 573 * data - so we use straightforward binary search (see bug 6247425) 574 */ 575 int left = 0, right = startCount.length; 576 index = startCount.length >> 1; 577 while (left < right) { 578 if (endCount[index] < charCode) { 579 left = index + 1; 580 } else { 581 right = index; 582 } 583 index = (left + right) >> 1; 584 } 585 586 if (charCode >= startCount[index] && charCode <= endCount[index]) { 587 int rangeOffset = idRangeOffset[index]; 588 589 if (rangeOffset == 0) { 590 glyphCode = (char)(charCode + idDelta[index]); 591 } else { 592 /* Calculate an index into the glyphIds array */ 593 594 /* 595 System.err.println("rangeoffset="+rangeOffset+ 596 " charCode=" + charCode + 597 " scnt["+index+"]="+(int)startCount[index] + 598 " segCnt="+segCount); 599 */ 600 601 int glyphIDIndex = rangeOffset - segCount + index 602 + (charCode - startCount[index]); 603 glyphCode = glyphIds[glyphIDIndex]; 604 if (glyphCode != 0) { 605 glyphCode = (char)(glyphCode + idDelta[index]); 606 } 607 } 608 } 609 if (glyphCode != 0) { 610 //System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode); 611 } 612 return glyphCode; 613 } 614 } 615 616 // Format 0: Byte Encoding table 617 static class CMapFormat0 extends CMap { 618 byte [] cmap; 619 620 CMapFormat0(ByteBuffer buffer, int offset) { 621 622 /* skip 6 bytes of format, length, and version */ 623 int len = buffer.getChar(offset+2); 624 cmap = new byte[len-6]; 625 buffer.position(offset+6); 626 buffer.get(cmap); 627 } 628 629 char getGlyph(int charCode) { 630 if (charCode < 256) { 631 if (charCode < 0x0010) { 632 switch (charCode) { 633 case 0x0009: 634 case 0x000a: 635 case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; 636 } 637 } 638 return (char)(0xff & cmap[charCode]); 639 } else { 640 return 0; 641 } 642 } 643 } 644 645 // static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) { 646 647 // CMap cmap = createCMap(buffer, offset, null); 648 // if (cmap == null) { 649 // return null; 650 // } else { 651 // return new CMapFormatSymbol(cmap, syms); 652 // } 653 // } 654 655 // static class CMapFormatSymbol extends CMap { 656 657 // CMap cmap; 658 // static final int NUM_BUCKETS = 128; 659 // Bucket[] buckets = new Bucket[NUM_BUCKETS]; 660 661 // class Bucket { 662 // char unicode; 663 // char glyph; 664 // Bucket next; 665 666 // Bucket(char u, char g) { 667 // unicode = u; 668 // glyph = g; 669 // } 670 // } 671 672 // CMapFormatSymbol(CMap cmap, char[] syms) { 673 674 // this.cmap = cmap; 675 676 // for (int i=0;i<syms.length;i++) { 677 // char unicode = syms[i]; 678 // if (unicode != noSuchChar) { 679 // char glyph = cmap.getGlyph(i + 0xf000); 680 // int hash = unicode % NUM_BUCKETS; 681 // Bucket bucket = new Bucket(unicode, glyph); 682 // if (buckets[hash] == null) { 683 // buckets[hash] = bucket; 684 // } else { 685 // Bucket b = buckets[hash]; 686 // while (b.next != null) { 687 // b = b.next; 688 // } 689 // b.next = bucket; 690 // } 691 // } 692 // } 693 // } 694 695 // char getGlyph(int unicode) { 696 // if (unicode >= 0x1000) { 697 // return 0; 698 // } 699 // else if (unicode >=0xf000 && unicode < 0xf100) { 700 // return cmap.getGlyph(unicode); 701 // } else { 702 // Bucket b = buckets[unicode % NUM_BUCKETS]; 703 // while (b != null) { 704 // if (b.unicode == unicode) { 705 // return b.glyph; 706 // } else { 707 // b = b.next; 708 // } 709 // } 710 // return 0; 711 // } 712 // } 713 // } 714 715 // Format 2: High-byte mapping through table 716 static class CMapFormat2 extends CMap { 717 718 char[] subHeaderKey = new char[256]; 719 /* Store subheaders in individual arrays 720 * A SubHeader entry theortically looks like { 721 * char firstCode; 722 * char entryCount; 723 * short idDelta; 724 * char idRangeOffset; 725 * } 726 */ 727 char[] firstCodeArray; 728 char[] entryCountArray; 729 short[] idDeltaArray; 730 char[] idRangeOffSetArray; 731 732 char[] glyphIndexArray; 733 734 CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) { 735 736 this.xlat = xlat; 737 738 int tableLen = buffer.getChar(offset+2); 739 buffer.position(offset+6); 740 CharBuffer cBuffer = buffer.asCharBuffer(); 741 char maxSubHeader = 0; 742 for (int i=0;i<256;i++) { 743 subHeaderKey[i] = cBuffer.get(); 744 if (subHeaderKey[i] > maxSubHeader) { 745 maxSubHeader = subHeaderKey[i]; 746 } 747 } 748 /* The value of the subHeaderKey is 8 * the subHeader index, 749 * so the number of subHeaders can be obtained by dividing 750 * this value bv 8 and adding 1. 751 */ 752 int numSubHeaders = (maxSubHeader >> 3) +1; 753 firstCodeArray = new char[numSubHeaders]; 754 entryCountArray = new char[numSubHeaders]; 755 idDeltaArray = new short[numSubHeaders]; 756 idRangeOffSetArray = new char[numSubHeaders]; 757 for (int i=0; i<numSubHeaders; i++) { 758 firstCodeArray[i] = cBuffer.get(); 759 entryCountArray[i] = cBuffer.get(); 760 idDeltaArray[i] = (short)cBuffer.get(); 761 idRangeOffSetArray[i] = cBuffer.get(); 762 // System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+ 763 // " ec="+(int)entryCountArray[i]+ 764 // " delta="+(int)idDeltaArray[i]+ 765 // " offset="+(int)idRangeOffSetArray[i]); 766 } 767 768 int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2; 769 glyphIndexArray = new char[glyphIndexArrSize]; 770 for (int i=0; i<glyphIndexArrSize;i++) { 771 glyphIndexArray[i] = cBuffer.get(); 772 } 773 } 774 775 char getGlyph(int charCode) { 776 int controlGlyph = getControlCodeGlyph(charCode, true); 777 if (controlGlyph >= 0) { 778 return (char)controlGlyph; 779 } 780 781 if (xlat != null) { 782 charCode = xlat[charCode]; 783 } 784 785 char highByte = (char)(charCode >> 8); 786 char lowByte = (char)(charCode & 0xff); 787 int key = subHeaderKey[highByte]>>3; // index into subHeaders 788 char mapMe; 789 790 if (key != 0) { 791 mapMe = lowByte; 792 } else { 793 mapMe = highByte; 794 if (mapMe == 0) { 795 mapMe = lowByte; 796 } 797 } 798 799 // System.err.println("charCode="+Integer.toHexString(charCode)+ 800 // " key="+key+ " mapMe="+Integer.toHexString(mapMe)); 801 char firstCode = firstCodeArray[key]; 802 if (mapMe < firstCode) { 803 return 0; 804 } else { 805 mapMe -= firstCode; 806 } 807 808 if (mapMe < entryCountArray[key]) { 809 /* "address" arithmetic is needed to calculate the offset 810 * into glyphIndexArray. "idRangeOffSetArray[key]" specifies 811 * the number of bytes from that location in the table where 812 * the subarray of glyphIndexes starting at "firstCode" begins. 813 * Each entry in the subHeader table is 8 bytes, and the 814 * idRangeOffSetArray field is at offset 6 in the entry. 815 * The glyphIndexArray immediately follows the subHeaders. 816 * So if there are "N" entries then the number of bytes to the 817 * start of glyphIndexArray is (N-key)*8-6. 818 * Subtract this from the idRangeOffSetArray value to get 819 * the number of bytes into glyphIndexArray and divide by 2 to 820 * get the (char) array index. 821 */ 822 int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6; 823 int glyphSubArrayStart = 824 (idRangeOffSetArray[key] - glyphArrayOffset)/2; 825 char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe]; 826 if (glyphCode != 0) { 827 glyphCode += idDeltaArray[key]; //idDelta 828 return glyphCode; 829 } 830 } 831 return 0; 832 } 833 } 834 835 // Format 6: Trimmed table mapping 836 static class CMapFormat6 extends CMap { 837 838 char firstCode; 839 char entryCount; 840 char[] glyphIdArray; 841 842 CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) { 843 844 bbuffer.position(offset+6); 845 CharBuffer buffer = bbuffer.asCharBuffer(); 846 firstCode = buffer.get(); 847 entryCount = buffer.get(); 848 glyphIdArray = new char[entryCount]; 849 for (int i=0; i< entryCount; i++) { 850 glyphIdArray[i] = buffer.get(); 851 } 852 } 853 854 char getGlyph(int charCode) { 855 int controlGlyph = getControlCodeGlyph(charCode, true); 856 if (controlGlyph >= 0) { 857 return (char)controlGlyph; 858 } 859 860 if (xlat != null) { 861 charCode = xlat[charCode]; 862 } 863 864 charCode -= firstCode; 865 if (charCode < 0 || charCode >= entryCount) { 866 return 0; 867 } else { 868 return glyphIdArray[charCode]; 869 } 870 } 871 } 872 873 // Format 8: mixed 16-bit and 32-bit coverage 874 // Seems unlikely this code will ever get tested as we look for 875 // MS platform Cmaps and MS states (in the Opentype spec on their website) 876 // that MS doesn't support this format 877 static class CMapFormat8 extends CMap { 878 byte[] is32 = new byte[8192]; 879 int nGroups; 880 int[] startCharCode; 881 int[] endCharCode; 882 int[] startGlyphID; 883 884 CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) { 885 886 bbuffer.position(12); 887 bbuffer.get(is32); 888 nGroups = bbuffer.getInt(); 889 startCharCode = new int[nGroups]; 890 endCharCode = new int[nGroups]; 891 startGlyphID = new int[nGroups]; 892 } 893 894 char getGlyph(int charCode) { 895 if (xlat != null) { 896 throw new RuntimeException("xlat array for cmap fmt=8"); 897 } 898 return 0; 899 } 900 901 } 902 903 904 // Format 4-byte 10: Trimmed table mapping 905 // Seems unlikely this code will ever get tested as we look for 906 // MS platform Cmaps and MS states (in the Opentype spec on their website) 907 // that MS doesn't support this format 908 static class CMapFormat10 extends CMap { 909 910 long firstCode; 911 int entryCount; 912 char[] glyphIdArray; 913 914 CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) { 915 916 firstCode = bbuffer.getInt() & INTMASK; 917 entryCount = bbuffer.getInt() & INTMASK; 918 bbuffer.position(offset+20); 919 CharBuffer buffer = bbuffer.asCharBuffer(); 920 glyphIdArray = new char[entryCount]; 921 for (int i=0; i< entryCount; i++) { 922 glyphIdArray[i] = buffer.get(); 923 } 924 } 925 926 char getGlyph(int charCode) { 927 928 if (xlat != null) { 929 throw new RuntimeException("xlat array for cmap fmt=10"); 930 } 931 932 int code = (int)(charCode - firstCode); 933 if (code < 0 || code >= entryCount) { 934 return 0; 935 } else { 936 return glyphIdArray[code]; 937 } 938 } 939 } 940 941 // Format 12: Segmented coverage for UCS-4 (fonts supporting 942 // surrogate pairs) 943 static class CMapFormat12 extends CMap { 944 945 int numGroups; 946 int highBit =0; 947 int power; 948 int extra; 949 long[] startCharCode; 950 long[] endCharCode; 951 int[] startGlyphID; 952 953 CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) { 954 if (xlat != null) { 955 throw new RuntimeException("xlat array for cmap fmt=12"); 956 } 957 958 numGroups = buffer.getInt(offset+12); 959 startCharCode = new long[numGroups]; 960 endCharCode = new long[numGroups]; 961 startGlyphID = new int[numGroups]; 962 buffer.position(offset+16); 963 buffer = buffer.slice(); 964 IntBuffer ibuffer = buffer.asIntBuffer(); 965 for (int i=0; i<numGroups; i++) { 966 startCharCode[i] = ibuffer.get() & INTMASK; 967 endCharCode[i] = ibuffer.get() & INTMASK; 968 startGlyphID[i] = ibuffer.get() & INTMASK; 969 } 970 971 /* Finds the high bit by binary searching through the bits */ 972 int value = numGroups; 973 974 if (value >= 1 << 16) { 975 value >>= 16; 976 highBit += 16; 977 } 978 979 if (value >= 1 << 8) { 980 value >>= 8; 981 highBit += 8; 982 } 983 984 if (value >= 1 << 4) { 985 value >>= 4; 986 highBit += 4; 987 } 988 989 if (value >= 1 << 2) { 990 value >>= 2; 991 highBit += 2; 992 } 993 994 if (value >= 1 << 1) { 995 value >>= 1; 996 highBit += 1; 997 } 998 999 power = 1 << highBit; 1000 extra = numGroups - power; 1001 } 1002 1003 char getGlyph(int charCode) { 1004 int controlGlyph = getControlCodeGlyph(charCode, false); 1005 if (controlGlyph >= 0) { 1006 return (char)controlGlyph; 1007 } 1008 int probe = power; 1009 int range = 0; 1010 1011 if (startCharCode[extra] <= charCode) { 1012 range = extra; 1013 } 1014 1015 while (probe > 1) { 1016 probe >>= 1; 1017 1018 if (startCharCode[range+probe] <= charCode) { 1019 range += probe; 1020 } 1021 } 1022 1023 if (startCharCode[range] <= charCode && 1024 endCharCode[range] >= charCode) { 1025 return (char) 1026 (startGlyphID[range] + (charCode - startCharCode[range])); 1027 } 1028 1029 return 0; 1030 } 1031 1032 } 1033 1034 /* Used to substitute for bad Cmaps. */ 1035 static class NullCMapClass extends CMap { 1036 1037 char getGlyph(int charCode) { 1038 return 0; 1039 } 1040 } 1041 1042 public static final NullCMapClass theNullCmap = new NullCMapClass(); 1043 1044 final int getControlCodeGlyph(int charCode, boolean noSurrogates) { 1045 if (charCode < 0x0010) { 1046 switch (charCode) { 1047 case 0x0009: 1048 case 0x000a: 1049 case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; 1050 } 1051 } else if (charCode >= 0x200c) { 1052 if ((charCode <= 0x200f) || 1053 (charCode >= 0x2028 && charCode <= 0x202e) || 1054 (charCode >= 0x206a && charCode <= 0x206f)) { 1055 return CharToGlyphMapper.INVISIBLE_GLYPH_ID; 1056 } else if (noSurrogates && charCode >= 0xFFFF) { 1057 return 0; 1058 } 1059 } 1060 return -1; 1061 } 1062 }