1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.font; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.IntBuffer; 31 import java.util.Locale; 32 import java.nio.charset.*; 33 34 /* 35 * A tt font has a CMAP table which is in turn made up of sub-tables which 36 * describe the char to glyph mapping in (possibly) multiple ways. 37 * CMAP subtables are described by 3 values. 38 * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK) 39 * 2. Encoding (eg 0=symbol, 1=unicode) 40 * 3. TrueType subtable format (how the char->glyph mapping for the encoding 41 * is stored in the subtable). See the TrueType spec. Format 4 is required 42 * by MS in fonts for windows. Its uses segmented mapping to delta values. 43 * Most typically we see are (3,1,4) : 44 * CMAP Platform ID=3 is what we use. 45 * Encodings that are used in practice by JDK on Solaris are 46 * symbol (3,0) 47 * unicode (3,1) 48 * GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5) 49 * The format for almost all subtables is 4. However the solaris (3,5) 50 * encodings are typically in format 2. 51 */ 52 abstract class CMap { 53 54 // static char WingDings_b2c[] = { 55 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 56 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 57 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 58 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 59 // 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 60 // 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 61 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 62 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d, 63 // 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 64 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 65 // 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd, 66 // 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 67 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 68 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 69 // 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd, 70 // 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd, 71 // 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, 72 // 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d, 73 // 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd, 74 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 75 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd, 76 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735, 77 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd, 78 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 79 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd, 80 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 81 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 82 // 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd, 83 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 84 // 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 85 // 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 86 // 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd, 87 // }; 88 89 // static char Symbols_b2c[] = { 90 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 91 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 92 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 93 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 94 // 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d, 95 // 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd, 96 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 97 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 98 // 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393, 99 // 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, 100 // 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9, 101 // 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd, 102 // 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3, 103 // 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf, 104 // 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9, 105 // 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd, 106 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 107 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 108 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 109 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 110 // 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd, 111 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 112 // 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219, 113 // 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd, 114 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229, 115 // 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, 116 // 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5, 117 // 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 118 // 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd, 119 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 120 // 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 121 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 122 // }; 123 124 static final short ShiftJISEncoding = 2; 125 static final short GBKEncoding = 3; 126 static final short Big5Encoding = 4; 127 static final short WansungEncoding = 5; 128 static final short JohabEncoding = 6; 129 static final short MSUnicodeSurrogateEncoding = 10; 130 131 static final char noSuchChar = (char)0xfffd; 132 static final int SHORTMASK = 0x0000ffff; 133 static final int INTMASK = 0x7fffffff; 134 135 static final char[][] converterMaps = new char[7][]; 136 137 /* 138 * Unicode->other encoding translation array. A pre-computed look up 139 * which can be shared across all fonts using that encoding. 140 * Using this saves running character coverters repeatedly. 141 */ 142 char[] xlat; 143 UVS uvs = null; 144 145 static CMap initialize(TrueTypeFont font) { 146 147 CMap cmap = null; 148 149 int offset, platformID, encodingID=-1; 150 151 int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0, 152 three6=0, three10=0; 153 int zero5 = 0; // for Unicode Variation Sequences 154 boolean threeStar = false; 155 156 ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag); 157 int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag); 158 short numberSubTables = cmapBuffer.getShort(2); 159 160 /* locate the offsets of all 3,* (ie Microsoft platform) encodings */ 161 for (int i=0; i<numberSubTables; i++) { 162 cmapBuffer.position(i * 8 + 4); 163 platformID = cmapBuffer.getShort(); 164 if (platformID == 3) { 165 threeStar = true; 166 encodingID = cmapBuffer.getShort(); 167 offset = cmapBuffer.getInt(); 168 switch (encodingID) { 169 case 0: three0 = offset; break; // MS Symbol encoding 170 case 1: three1 = offset; break; // MS Unicode cmap 171 case 2: three2 = offset; break; // ShiftJIS cmap. 172 case 3: three3 = offset; break; // GBK cmap 173 case 4: three4 = offset; break; // Big 5 cmap 174 case 5: three5 = offset; break; // Wansung 175 case 6: three6 = offset; break; // Johab 176 case 10: three10 = offset; break; // MS Unicode surrogates 177 } 178 } else if (platformID == 0) { 179 encodingID = cmapBuffer.getShort(); 180 offset = cmapBuffer.getInt(); 181 if (encodingID == 5) { 182 zero5 = offset; 183 } 184 } 185 } 186 187 /* This defines the preference order for cmap subtables */ 188 if (threeStar) { 189 if (three10 != 0) { 190 cmap = createCMap(cmapBuffer, three10, null); 191 } 192 else if (three0 != 0) { 193 /* The special case treatment of these fonts leads to 194 * anomalies where a user can view "wingdings" and "wingdings2" 195 * and the latter shows all its code points in the unicode 196 * private use area at 0xF000->0XF0FF and the former shows 197 * a scattered subset of its glyphs that are known mappings to 198 * unicode code points. 199 * The primary purpose of these mappings was to facilitate 200 * display of symbol chars etc in composite fonts, however 201 * this is not needed as all these code points are covered 202 * by some other platform symbol font. 203 * Commenting this out reduces the role of these two files 204 * (assuming that they continue to be used in font.properties) 205 * to just one of contributing to the overall composite 206 * font metrics, and also AWT can still access the fonts. 207 * Clients which explicitly accessed these fonts as names 208 * "Symbol" and "Wingdings" (ie as physical fonts) and 209 * expected to see a scattering of these characters will 210 * see them now as missing. How much of a problem is this? 211 * Perhaps we could still support this mapping just for 212 * "Symbol.ttf" but I suspect some users would prefer it 213 * to be mapped in to the Latin range as that is how 214 * the "symbol" font is used in native apps. 215 */ 216 // String name = font.platName.toLowerCase(Locale.ENGLISH); 217 // if (name.endsWith("symbol.ttf")) { 218 // cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c); 219 // } else if (name.endsWith("wingding.ttf")) { 220 // cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c); 221 // } else { 222 cmap = createCMap(cmapBuffer, three0, null); 223 // } 224 } 225 else if (three1 != 0) { 226 cmap = createCMap(cmapBuffer, three1, null); 227 } 228 else if (three2 != 0) { 229 cmap = createCMap(cmapBuffer, three2, 230 getConverterMap(ShiftJISEncoding)); 231 } 232 else if (three3 != 0) { 233 cmap = createCMap(cmapBuffer, three3, 234 getConverterMap(GBKEncoding)); 235 } 236 else if (three4 != 0) { 237 cmap = createCMap(cmapBuffer, three4, 238 getConverterMap(Big5Encoding)); 239 } 240 else if (three5 != 0) { 241 cmap = createCMap(cmapBuffer, three5, 242 getConverterMap(WansungEncoding)); 243 } 244 else if (three6 != 0) { 245 cmap = createCMap(cmapBuffer, three6, 246 getConverterMap(JohabEncoding)); 247 } 248 } else { 249 /* No 3,* subtable was found. Just use whatever is the first 250 * table listed. Not very useful but maybe better than 251 * rejecting the font entirely? 252 */ 253 cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null); 254 } 255 // For Unicode Variation Sequences 256 if (cmap != null && zero5 != 0) { 257 cmap.createUVS(cmapBuffer, zero5); 258 } 259 return cmap; 260 } 261 262 /* speed up the converting by setting the range for double 263 * byte characters; 264 */ 265 static char[] getConverter(short encodingID) { 266 int dBegin = 0x8000; 267 int dEnd = 0xffff; 268 String encoding; 269 270 switch (encodingID) { 271 case ShiftJISEncoding: 272 dBegin = 0x8140; 273 dEnd = 0xfcfc; 274 encoding = "SJIS"; 275 break; 276 case GBKEncoding: 277 dBegin = 0x8140; 278 dEnd = 0xfea0; 279 encoding = "GBK"; 280 break; 281 case Big5Encoding: 282 dBegin = 0xa140; 283 dEnd = 0xfefe; 284 encoding = "Big5"; 285 break; 286 case WansungEncoding: 287 dBegin = 0xa1a1; 288 dEnd = 0xfede; 289 encoding = "EUC_KR"; 290 break; 291 case JohabEncoding: 292 dBegin = 0x8141; 293 dEnd = 0xfdfe; 294 encoding = "Johab"; 295 break; 296 default: 297 return null; 298 } 299 300 try { 301 char[] convertedChars = new char[65536]; 302 for (int i=0; i<65536; i++) { 303 convertedChars[i] = noSuchChar; 304 } 305 306 byte[] inputBytes = new byte[(dEnd-dBegin+1)*2]; 307 char[] outputChars = new char[(dEnd-dBegin+1)]; 308 309 int j = 0; 310 int firstByte; 311 if (encodingID == ShiftJISEncoding) { 312 for (int i = dBegin; i <= dEnd; i++) { 313 firstByte = (i >> 8 & 0xff); 314 if (firstByte >= 0xa1 && firstByte <= 0xdf) { 315 //sjis halfwidth katakana 316 inputBytes[j++] = (byte)0xff; 317 inputBytes[j++] = (byte)0xff; 318 } else { 319 inputBytes[j++] = (byte)firstByte; 320 inputBytes[j++] = (byte)(i & 0xff); 321 } 322 } 323 } else { 324 for (int i = dBegin; i <= dEnd; i++) { 325 inputBytes[j++] = (byte)(i>>8 & 0xff); 326 inputBytes[j++] = (byte)(i & 0xff); 327 } 328 } 329 330 Charset.forName(encoding).newDecoder() 331 .onMalformedInput(CodingErrorAction.REPLACE) 332 .onUnmappableCharacter(CodingErrorAction.REPLACE) 333 .replaceWith("\u0000") 334 .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length), 335 CharBuffer.wrap(outputChars, 0, outputChars.length), 336 true); 337 338 // ensure single byte ascii 339 for (int i = 0x20; i <= 0x7e; i++) { 340 convertedChars[i] = (char)i; 341 } 342 343 //sjis halfwidth katakana 344 if (encodingID == ShiftJISEncoding) { 345 for (int i = 0xa1; i <= 0xdf; i++) { 346 convertedChars[i] = (char)(i - 0xa1 + 0xff61); 347 } 348 } 349 350 /* It would save heap space (approx 60Kbytes for each of these 351 * converters) if stored only valid ranges (ie returned 352 * outputChars directly. But this is tricky since want to 353 * include the ASCII range too. 354 */ 355 // System.err.println("oc.len="+outputChars.length); 356 // System.err.println("cc.len="+convertedChars.length); 357 // System.err.println("dbegin="+dBegin); 358 System.arraycopy(outputChars, 0, convertedChars, dBegin, 359 outputChars.length); 360 361 //return convertedChars; 362 /* invert this map as now want it to map from Unicode 363 * to other encoding. 364 */ 365 char [] invertedChars = new char[65536]; 366 for (int i=0;i<65536;i++) { 367 if (convertedChars[i] != noSuchChar) { 368 invertedChars[convertedChars[i]] = (char)i; 369 } 370 } 371 return invertedChars; 372 373 } catch (Exception e) { 374 e.printStackTrace(); 375 } 376 return null; 377 } 378 379 /* 380 * The returned array maps to unicode from some other 2 byte encoding 381 * eg for a 2byte index which represents a SJIS char, the indexed 382 * value is the corresponding unicode char. 383 */ 384 static char[] getConverterMap(short encodingID) { 385 if (converterMaps[encodingID] == null) { 386 converterMaps[encodingID] = getConverter(encodingID); 387 } 388 return converterMaps[encodingID]; 389 } 390 391 392 static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) { 393 /* First do a sanity check that this cmap subtable is contained 394 * within the cmap table. 395 */ 396 int subtableFormat = buffer.getChar(offset); 397 long subtableLength; 398 if (subtableFormat < 8) { 399 subtableLength = buffer.getChar(offset+2); 400 } else { 401 subtableLength = buffer.getInt(offset+4) & INTMASK; 402 } 403 if (FontUtilities.isLogging() && offset + subtableLength > buffer.capacity()) { 404 FontUtilities.logWarning("Cmap subtable overflows buffer."); 405 } 406 switch (subtableFormat) { 407 case 0: return new CMapFormat0(buffer, offset); 408 case 2: return new CMapFormat2(buffer, offset, xlat); 409 case 4: return new CMapFormat4(buffer, offset, xlat); 410 case 6: return new CMapFormat6(buffer, offset, xlat); 411 case 8: return new CMapFormat8(buffer, offset, xlat); 412 case 10: return new CMapFormat10(buffer, offset, xlat); 413 case 12: return new CMapFormat12(buffer, offset, xlat); 414 default: throw new RuntimeException("Cmap format unimplemented: " + 415 (int)buffer.getChar(offset)); 416 } 417 } 418 419 private void createUVS(ByteBuffer buffer, int offset) { 420 int subtableFormat = buffer.getChar(offset); 421 if (subtableFormat == 14) { 422 long subtableLength = buffer.getInt(offset + 2) & INTMASK; 423 if (FontUtilities.isLogging() && offset + subtableLength > buffer.capacity()) { 424 FontUtilities.logWarning("Cmap UVS subtable overflows buffer."); 425 } 426 try { 427 this.uvs = new UVS(buffer, offset); 428 } catch (Throwable t) { 429 t.printStackTrace(); 430 } 431 } 432 return; 433 } 434 435 /* 436 final char charVal(byte[] cmap, int index) { 437 return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); 438 } 439 440 final short shortVal(byte[] cmap, int index) { 441 return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); 442 } 443 */ 444 abstract char getGlyph(int charCode); 445 446 /* Format 4 Header is 447 * ushort format (off=0) 448 * ushort length (off=2) 449 * ushort language (off=4) 450 * ushort segCountX2 (off=6) 451 * ushort searchRange (off=8) 452 * ushort entrySelector (off=10) 453 * ushort rangeShift (off=12) 454 * ushort endCount[segCount] (off=14) 455 * ushort reservedPad 456 * ushort startCount[segCount] 457 * short idDelta[segCount] 458 * idRangeOFfset[segCount] 459 * ushort glyphIdArray[] 460 */ 461 static class CMapFormat4 extends CMap { 462 int segCount; 463 int entrySelector; 464 int rangeShift; 465 char[] endCount; 466 char[] startCount; 467 short[] idDelta; 468 char[] idRangeOffset; 469 char[] glyphIds; 470 471 CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) { 472 473 this.xlat = xlat; 474 475 bbuffer.position(offset); 476 CharBuffer buffer = bbuffer.asCharBuffer(); 477 buffer.get(); // skip, we already know format=4 478 int subtableLength = buffer.get(); 479 /* Try to recover from some bad fonts which specify a subtable 480 * length that would overflow the byte buffer holding the whole 481 * cmap table. If this isn't a recoverable situation an exception 482 * may be thrown which is caught higher up the call stack. 483 * Whilst this may seem lenient, in practice, unless the "bad" 484 * subtable we are using is the last one in the cmap table we 485 * would have no way of knowing about this problem anyway. 486 */ 487 if (offset+subtableLength > bbuffer.capacity()) { 488 subtableLength = bbuffer.capacity() - offset; 489 } 490 buffer.get(); // skip language 491 segCount = buffer.get()/2; 492 int searchRange = buffer.get(); 493 entrySelector = buffer.get(); 494 rangeShift = buffer.get()/2; 495 startCount = new char[segCount]; 496 endCount = new char[segCount]; 497 idDelta = new short[segCount]; 498 idRangeOffset = new char[segCount]; 499 500 for (int i=0; i<segCount; i++) { 501 endCount[i] = buffer.get(); 502 } 503 buffer.get(); // 2 bytes for reserved pad 504 for (int i=0; i<segCount; i++) { 505 startCount[i] = buffer.get(); 506 } 507 508 for (int i=0; i<segCount; i++) { 509 idDelta[i] = (short)buffer.get(); 510 } 511 512 for (int i=0; i<segCount; i++) { 513 char ctmp = buffer.get(); 514 idRangeOffset[i] = (char)((ctmp>>1)&0xffff); 515 } 516 /* Can calculate the number of glyph IDs by subtracting 517 * "pos" from the length of the cmap 518 */ 519 int pos = (segCount*8+16)/2; 520 buffer.position(pos); 521 int numGlyphIds = (subtableLength/2 - pos); 522 glyphIds = new char[numGlyphIds]; 523 for (int i=0;i<numGlyphIds;i++) { 524 glyphIds[i] = buffer.get(); 525 } 526 /* 527 System.err.println("segcount="+segCount); 528 System.err.println("entrySelector="+entrySelector); 529 System.err.println("rangeShift="+rangeShift); 530 for (int j=0;j<segCount;j++) { 531 System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+ 532 " ec="+(int)(endCount[j]&0xffff)+ 533 " delta="+idDelta[j] + 534 " ro="+(int)idRangeOffset[j]); 535 } 536 537 //System.err.println("numglyphs="+glyphIds.length); 538 for (int i=0;i<numGlyphIds;i++) { 539 System.err.println("gid["+i+"]="+(int)glyphIds[i]); 540 } 541 */ 542 } 543 544 char getGlyph(int charCode) { 545 546 final int origCharCode = charCode; 547 int index = 0; 548 char glyphCode = 0; 549 550 int controlGlyph = getControlCodeGlyph(charCode, true); 551 if (controlGlyph >= 0) { 552 return (char)controlGlyph; 553 } 554 555 /* presence of translation array indicates that this 556 * cmap is in some other (non-unicode encoding). 557 * In order to look-up a char->glyph mapping we need to 558 * translate the unicode code point to the encoding of 559 * the cmap. 560 * REMIND: VALID CHARCODES?? 561 */ 562 if (xlat != null) { 563 charCode = xlat[charCode]; 564 } 565 566 /* 567 * Citation from the TrueType (and OpenType) spec: 568 * The segments are sorted in order of increasing endCode 569 * values, and the segment values are specified in four parallel 570 * arrays. You search for the first endCode that is greater than 571 * or equal to the character code you want to map. If the 572 * corresponding startCode is less than or equal to the 573 * character code, then you use the corresponding idDelta and 574 * idRangeOffset to map the character code to a glyph index 575 * (otherwise, the missingGlyph is returned). 576 */ 577 578 /* 579 * CMAP format4 defines several fields for optimized search of 580 * the segment list (entrySelector, searchRange, rangeShift). 581 * However, benefits are neglible and some fonts have incorrect 582 * data - so we use straightforward binary search (see bug 6247425) 583 */ 584 int left = 0, right = startCount.length; 585 index = startCount.length >> 1; 586 while (left < right) { 587 if (endCount[index] < charCode) { 588 left = index + 1; 589 } else { 590 right = index; 591 } 592 index = (left + right) >> 1; 593 } 594 595 if (charCode >= startCount[index] && charCode <= endCount[index]) { 596 int rangeOffset = idRangeOffset[index]; 597 598 if (rangeOffset == 0) { 599 glyphCode = (char)(charCode + idDelta[index]); 600 } else { 601 /* Calculate an index into the glyphIds array */ 602 603 /* 604 System.err.println("rangeoffset="+rangeOffset+ 605 " charCode=" + charCode + 606 " scnt["+index+"]="+(int)startCount[index] + 607 " segCnt="+segCount); 608 */ 609 610 int glyphIDIndex = rangeOffset - segCount + index 611 + (charCode - startCount[index]); 612 glyphCode = glyphIds[glyphIDIndex]; 613 if (glyphCode != 0) { 614 glyphCode = (char)(glyphCode + idDelta[index]); 615 } 616 } 617 } 618 if (glyphCode == 0) { 619 glyphCode = getFormatCharGlyph(origCharCode); 620 } 621 return glyphCode; 622 } 623 } 624 625 // Format 0: Byte Encoding table 626 static class CMapFormat0 extends CMap { 627 byte [] cmap; 628 629 CMapFormat0(ByteBuffer buffer, int offset) { 630 631 /* skip 6 bytes of format, length, and version */ 632 int len = buffer.getChar(offset+2); 633 cmap = new byte[len-6]; 634 buffer.position(offset+6); 635 buffer.get(cmap); 636 } 637 638 char getGlyph(int charCode) { 639 if (charCode < 256) { 640 if (charCode < 0x0010) { 641 switch (charCode) { 642 case 0x0009: 643 case 0x000a: 644 case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; 645 } 646 } 647 return (char)(0xff & cmap[charCode]); 648 } else { 649 return 0; 650 } 651 } 652 } 653 654 // static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) { 655 656 // CMap cmap = createCMap(buffer, offset, null); 657 // if (cmap == null) { 658 // return null; 659 // } else { 660 // return new CMapFormatSymbol(cmap, syms); 661 // } 662 // } 663 664 // static class CMapFormatSymbol extends CMap { 665 666 // CMap cmap; 667 // static final int NUM_BUCKETS = 128; 668 // Bucket[] buckets = new Bucket[NUM_BUCKETS]; 669 670 // class Bucket { 671 // char unicode; 672 // char glyph; 673 // Bucket next; 674 675 // Bucket(char u, char g) { 676 // unicode = u; 677 // glyph = g; 678 // } 679 // } 680 681 // CMapFormatSymbol(CMap cmap, char[] syms) { 682 683 // this.cmap = cmap; 684 685 // for (int i=0;i<syms.length;i++) { 686 // char unicode = syms[i]; 687 // if (unicode != noSuchChar) { 688 // char glyph = cmap.getGlyph(i + 0xf000); 689 // int hash = unicode % NUM_BUCKETS; 690 // Bucket bucket = new Bucket(unicode, glyph); 691 // if (buckets[hash] == null) { 692 // buckets[hash] = bucket; 693 // } else { 694 // Bucket b = buckets[hash]; 695 // while (b.next != null) { 696 // b = b.next; 697 // } 698 // b.next = bucket; 699 // } 700 // } 701 // } 702 // } 703 704 // char getGlyph(int unicode) { 705 // if (unicode >= 0x1000) { 706 // return 0; 707 // } 708 // else if (unicode >=0xf000 && unicode < 0xf100) { 709 // return cmap.getGlyph(unicode); 710 // } else { 711 // Bucket b = buckets[unicode % NUM_BUCKETS]; 712 // while (b != null) { 713 // if (b.unicode == unicode) { 714 // return b.glyph; 715 // } else { 716 // b = b.next; 717 // } 718 // } 719 // return 0; 720 // } 721 // } 722 // } 723 724 // Format 2: High-byte mapping through table 725 static class CMapFormat2 extends CMap { 726 727 char[] subHeaderKey = new char[256]; 728 /* Store subheaders in individual arrays 729 * A SubHeader entry theortically looks like { 730 * char firstCode; 731 * char entryCount; 732 * short idDelta; 733 * char idRangeOffset; 734 * } 735 */ 736 char[] firstCodeArray; 737 char[] entryCountArray; 738 short[] idDeltaArray; 739 char[] idRangeOffSetArray; 740 741 char[] glyphIndexArray; 742 743 CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) { 744 745 this.xlat = xlat; 746 747 int tableLen = buffer.getChar(offset+2); 748 buffer.position(offset+6); 749 CharBuffer cBuffer = buffer.asCharBuffer(); 750 char maxSubHeader = 0; 751 for (int i=0;i<256;i++) { 752 subHeaderKey[i] = cBuffer.get(); 753 if (subHeaderKey[i] > maxSubHeader) { 754 maxSubHeader = subHeaderKey[i]; 755 } 756 } 757 /* The value of the subHeaderKey is 8 * the subHeader index, 758 * so the number of subHeaders can be obtained by dividing 759 * this value bv 8 and adding 1. 760 */ 761 int numSubHeaders = (maxSubHeader >> 3) +1; 762 firstCodeArray = new char[numSubHeaders]; 763 entryCountArray = new char[numSubHeaders]; 764 idDeltaArray = new short[numSubHeaders]; 765 idRangeOffSetArray = new char[numSubHeaders]; 766 for (int i=0; i<numSubHeaders; i++) { 767 firstCodeArray[i] = cBuffer.get(); 768 entryCountArray[i] = cBuffer.get(); 769 idDeltaArray[i] = (short)cBuffer.get(); 770 idRangeOffSetArray[i] = cBuffer.get(); 771 // System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+ 772 // " ec="+(int)entryCountArray[i]+ 773 // " delta="+(int)idDeltaArray[i]+ 774 // " offset="+(int)idRangeOffSetArray[i]); 775 } 776 777 int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2; 778 glyphIndexArray = new char[glyphIndexArrSize]; 779 for (int i=0; i<glyphIndexArrSize;i++) { 780 glyphIndexArray[i] = cBuffer.get(); 781 } 782 } 783 784 char getGlyph(int charCode) { 785 final int origCharCode = charCode; 786 int controlGlyph = getControlCodeGlyph(charCode, true); 787 if (controlGlyph >= 0) { 788 return (char)controlGlyph; 789 } 790 791 if (xlat != null) { 792 charCode = xlat[charCode]; 793 } 794 795 char highByte = (char)(charCode >> 8); 796 char lowByte = (char)(charCode & 0xff); 797 int key = subHeaderKey[highByte]>>3; // index into subHeaders 798 char mapMe; 799 800 if (key != 0) { 801 mapMe = lowByte; 802 } else { 803 mapMe = highByte; 804 if (mapMe == 0) { 805 mapMe = lowByte; 806 } 807 } 808 809 // System.err.println("charCode="+Integer.toHexString(charCode)+ 810 // " key="+key+ " mapMe="+Integer.toHexString(mapMe)); 811 char firstCode = firstCodeArray[key]; 812 if (mapMe < firstCode) { 813 return 0; 814 } else { 815 mapMe -= firstCode; 816 } 817 818 if (mapMe < entryCountArray[key]) { 819 /* "address" arithmetic is needed to calculate the offset 820 * into glyphIndexArray. "idRangeOffSetArray[key]" specifies 821 * the number of bytes from that location in the table where 822 * the subarray of glyphIndexes starting at "firstCode" begins. 823 * Each entry in the subHeader table is 8 bytes, and the 824 * idRangeOffSetArray field is at offset 6 in the entry. 825 * The glyphIndexArray immediately follows the subHeaders. 826 * So if there are "N" entries then the number of bytes to the 827 * start of glyphIndexArray is (N-key)*8-6. 828 * Subtract this from the idRangeOffSetArray value to get 829 * the number of bytes into glyphIndexArray and divide by 2 to 830 * get the (char) array index. 831 */ 832 int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6; 833 int glyphSubArrayStart = 834 (idRangeOffSetArray[key] - glyphArrayOffset)/2; 835 char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe]; 836 if (glyphCode != 0) { 837 glyphCode += idDeltaArray[key]; //idDelta 838 return glyphCode; 839 } 840 } 841 return getFormatCharGlyph(origCharCode); 842 } 843 } 844 845 // Format 6: Trimmed table mapping 846 static class CMapFormat6 extends CMap { 847 848 char firstCode; 849 char entryCount; 850 char[] glyphIdArray; 851 852 CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) { 853 854 bbuffer.position(offset+6); 855 CharBuffer buffer = bbuffer.asCharBuffer(); 856 firstCode = buffer.get(); 857 entryCount = buffer.get(); 858 glyphIdArray = new char[entryCount]; 859 for (int i=0; i< entryCount; i++) { 860 glyphIdArray[i] = buffer.get(); 861 } 862 } 863 864 char getGlyph(int charCode) { 865 final int origCharCode = charCode; 866 int controlGlyph = getControlCodeGlyph(charCode, true); 867 if (controlGlyph >= 0) { 868 return (char)controlGlyph; 869 } 870 871 if (xlat != null) { 872 charCode = xlat[charCode]; 873 } 874 875 charCode -= firstCode; 876 if (charCode < 0 || charCode >= entryCount) { 877 return getFormatCharGlyph(origCharCode); 878 } else { 879 return glyphIdArray[charCode]; 880 } 881 } 882 } 883 884 // Format 8: mixed 16-bit and 32-bit coverage 885 // Seems unlikely this code will ever get tested as we look for 886 // MS platform Cmaps and MS states (in the Opentype spec on their website) 887 // that MS doesn't support this format 888 static class CMapFormat8 extends CMap { 889 byte[] is32 = new byte[8192]; 890 int nGroups; 891 int[] startCharCode; 892 int[] endCharCode; 893 int[] startGlyphID; 894 895 CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) { 896 897 bbuffer.position(12); 898 bbuffer.get(is32); 899 nGroups = bbuffer.getInt() & INTMASK; 900 // A map group record is three uint32's making for 12 bytes total 901 if (bbuffer.remaining() < (12 * (long)nGroups)) { 902 throw new RuntimeException("Format 8 table exceeded"); 903 } 904 startCharCode = new int[nGroups]; 905 endCharCode = new int[nGroups]; 906 startGlyphID = new int[nGroups]; 907 } 908 909 char getGlyph(int charCode) { 910 if (xlat != null) { 911 throw new RuntimeException("xlat array for cmap fmt=8"); 912 } 913 return 0; 914 } 915 916 } 917 918 919 // Format 4-byte 10: Trimmed table mapping 920 // Seems unlikely this code will ever get tested as we look for 921 // MS platform Cmaps and MS states (in the Opentype spec on their website) 922 // that MS doesn't support this format 923 static class CMapFormat10 extends CMap { 924 925 long firstCode; 926 int entryCount; 927 char[] glyphIdArray; 928 929 CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) { 930 931 bbuffer.position(offset+12); 932 firstCode = bbuffer.getInt() & INTMASK; 933 entryCount = bbuffer.getInt() & INTMASK; 934 // each glyph is a uint16, so 2 bytes per value. 935 if (bbuffer.remaining() < (2 * (long)entryCount)) { 936 throw new RuntimeException("Format 10 table exceeded"); 937 } 938 CharBuffer buffer = bbuffer.asCharBuffer(); 939 glyphIdArray = new char[entryCount]; 940 for (int i=0; i< entryCount; i++) { 941 glyphIdArray[i] = buffer.get(); 942 } 943 } 944 945 char getGlyph(int charCode) { 946 947 if (xlat != null) { 948 throw new RuntimeException("xlat array for cmap fmt=10"); 949 } 950 951 int code = (int)(charCode - firstCode); 952 if (code < 0 || code >= entryCount) { 953 return 0; 954 } else { 955 return glyphIdArray[code]; 956 } 957 } 958 } 959 960 // Format 12: Segmented coverage for UCS-4 (fonts supporting 961 // surrogate pairs) 962 static class CMapFormat12 extends CMap { 963 964 int numGroups; 965 int highBit =0; 966 int power; 967 int extra; 968 long[] startCharCode; 969 long[] endCharCode; 970 int[] startGlyphID; 971 972 CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) { 973 if (xlat != null) { 974 throw new RuntimeException("xlat array for cmap fmt=12"); 975 } 976 977 buffer.position(offset+12); 978 numGroups = buffer.getInt() & INTMASK; 979 // A map group record is three uint32's making for 12 bytes total 980 if (buffer.remaining() < (12 * (long)numGroups)) { 981 throw new RuntimeException("Format 12 table exceeded"); 982 } 983 startCharCode = new long[numGroups]; 984 endCharCode = new long[numGroups]; 985 startGlyphID = new int[numGroups]; 986 buffer = buffer.slice(); 987 IntBuffer ibuffer = buffer.asIntBuffer(); 988 for (int i=0; i<numGroups; i++) { 989 startCharCode[i] = ibuffer.get() & INTMASK; 990 endCharCode[i] = ibuffer.get() & INTMASK; 991 startGlyphID[i] = ibuffer.get() & INTMASK; 992 } 993 994 /* Finds the high bit by binary searching through the bits */ 995 int value = numGroups; 996 997 if (value >= 1 << 16) { 998 value >>= 16; 999 highBit += 16; 1000 } 1001 1002 if (value >= 1 << 8) { 1003 value >>= 8; 1004 highBit += 8; 1005 } 1006 1007 if (value >= 1 << 4) { 1008 value >>= 4; 1009 highBit += 4; 1010 } 1011 1012 if (value >= 1 << 2) { 1013 value >>= 2; 1014 highBit += 2; 1015 } 1016 1017 if (value >= 1 << 1) { 1018 value >>= 1; 1019 highBit += 1; 1020 } 1021 1022 power = 1 << highBit; 1023 extra = numGroups - power; 1024 } 1025 1026 char getGlyph(int charCode) { 1027 final int origCharCode = charCode; 1028 int controlGlyph = getControlCodeGlyph(charCode, false); 1029 if (controlGlyph >= 0) { 1030 return (char)controlGlyph; 1031 } 1032 int probe = power; 1033 int range = 0; 1034 1035 if (startCharCode[extra] <= charCode) { 1036 range = extra; 1037 } 1038 1039 while (probe > 1) { 1040 probe >>= 1; 1041 1042 if (startCharCode[range+probe] <= charCode) { 1043 range += probe; 1044 } 1045 } 1046 1047 if (startCharCode[range] <= charCode && 1048 endCharCode[range] >= charCode) { 1049 return (char) 1050 (startGlyphID[range] + (charCode - startCharCode[range])); 1051 } 1052 1053 return getFormatCharGlyph(origCharCode); 1054 } 1055 1056 } 1057 1058 /* Used to substitute for bad Cmaps. */ 1059 static class NullCMapClass extends CMap { 1060 1061 char getGlyph(int charCode) { 1062 return 0; 1063 } 1064 } 1065 1066 public static final NullCMapClass theNullCmap = new NullCMapClass(); 1067 1068 final int getControlCodeGlyph(int charCode, boolean noSurrogates) { 1069 if (charCode < 0x0010) { 1070 switch (charCode) { 1071 case 0x0009: 1072 case 0x000a: 1073 case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; 1074 } 1075 } else if (noSurrogates && charCode >= 0xFFFF) { 1076 return 0; 1077 } 1078 return -1; 1079 } 1080 1081 final char getFormatCharGlyph(int charCode) { 1082 if (charCode >= 0x200c) { 1083 if ((charCode <= 0x200f) || 1084 (charCode >= 0x2028 && charCode <= 0x202e) || 1085 (charCode >= 0x206a && charCode <= 0x206f)) { 1086 return (char)CharToGlyphMapper.INVISIBLE_GLYPH_ID; 1087 } 1088 } 1089 return 0; 1090 } 1091 1092 static class UVS { 1093 int numSelectors; 1094 int[] selector; 1095 1096 //for Non-Default UVS Table 1097 int[] numUVSMapping; 1098 int[][] unicodeValue; 1099 char[][] glyphID; 1100 1101 UVS(ByteBuffer buffer, int offset) { 1102 buffer.position(offset+6); 1103 numSelectors = buffer.getInt() & INTMASK; 1104 // A variation selector record is one 3 byte int + two int32's 1105 // making for 11 bytes per record. 1106 if (buffer.remaining() < (11 * (long)numSelectors)) { 1107 throw new RuntimeException("Variations exceed buffer"); 1108 } 1109 selector = new int[numSelectors]; 1110 numUVSMapping = new int[numSelectors]; 1111 unicodeValue = new int[numSelectors][]; 1112 glyphID = new char[numSelectors][]; 1113 1114 for (int i = 0; i < numSelectors; i++) { 1115 buffer.position(offset + 10 + i * 11); 1116 selector[i] = (buffer.get() & 0xff) << 16; //UINT24 1117 selector[i] += (buffer.get() & 0xff) << 8; 1118 selector[i] += buffer.get() & 0xff; 1119 1120 //skip Default UVS Table 1121 1122 //for Non-Default UVS Table 1123 int tableOffset = buffer.getInt(offset + 10 + i * 11 + 7); 1124 if (tableOffset == 0) { 1125 numUVSMapping[i] = 0; 1126 } else if (tableOffset > 0) { 1127 buffer.position(offset+tableOffset); 1128 numUVSMapping[i] = buffer.getInt() & INTMASK; 1129 // a UVS mapping record is one 3 byte int + uint16 1130 // making for 5 bytes per record. 1131 if (buffer.remaining() < (5 * (long)numUVSMapping[i])) { 1132 throw new RuntimeException("Variations exceed buffer"); 1133 } 1134 unicodeValue[i] = new int[numUVSMapping[i]]; 1135 glyphID[i] = new char[numUVSMapping[i]]; 1136 1137 for (int j = 0; j < numUVSMapping[i]; j++) { 1138 int temp = (buffer.get() & 0xff) << 16; //UINT24 1139 temp += (buffer.get() & 0xff) << 8; 1140 temp += buffer.get() & 0xff; 1141 unicodeValue[i][j] = temp; 1142 glyphID[i][j] = buffer.getChar(); 1143 } 1144 } 1145 } 1146 } 1147 1148 static final int VS_NOGLYPH = 0; 1149 private int getGlyph(int charCode, int variationSelector) { 1150 int targetSelector = -1; 1151 for (int i = 0; i < numSelectors; i++) { 1152 if (selector[i] == variationSelector) { 1153 targetSelector = i; 1154 break; 1155 } 1156 } 1157 if (targetSelector == -1) { 1158 return VS_NOGLYPH; 1159 } 1160 if (numUVSMapping[targetSelector] > 0) { 1161 int index = java.util.Arrays.binarySearch( 1162 unicodeValue[targetSelector], charCode); 1163 if (index >= 0) { 1164 return glyphID[targetSelector][index]; 1165 } 1166 } 1167 return VS_NOGLYPH; 1168 } 1169 } 1170 1171 char getVariationGlyph(int charCode, int variationSelector) { 1172 char glyph = 0; 1173 if (uvs == null) { 1174 glyph = getGlyph(charCode); 1175 } else { 1176 int result = uvs.getGlyph(charCode, variationSelector); 1177 if (result > 0) { 1178 glyph = (char)(result & 0xFFFF); 1179 } else { 1180 glyph = getGlyph(charCode); 1181 } 1182 } 1183 return glyph; 1184 } 1185 }