1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.font; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.IntBuffer; 31 import java.util.Locale; 32 import java.nio.charset.*; 33 34 /* 35 * A tt font has a CMAP table which is in turn made up of sub-tables which 36 * describe the char to glyph mapping in (possibly) multiple ways. 37 * CMAP subtables are described by 3 values. 38 * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK) 39 * 2. Encoding (eg 0=symbol, 1=unicode) 40 * 3. TrueType subtable format (how the char->glyph mapping for the encoding 41 * is stored in the subtable). See the TrueType spec. Format 4 is required 42 * by MS in fonts for windows. Its uses segmented mapping to delta values. 43 * Most typically we see are (3,1,4) : 44 * CMAP Platform ID=3 is what we use. 45 * Encodings that are used in practice by JDK on Solaris are 46 * symbol (3,0) 47 * unicode (3,1) 48 * GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5) 49 * The format for almost all subtables is 4. However the solaris (3,5) 50 * encodings are typically in format 2. 51 */ 52 abstract class CMap { 53 54 // static char WingDings_b2c[] = { 55 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 56 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 57 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 58 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 59 // 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 60 // 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 61 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 62 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d, 63 // 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 64 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 65 // 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd, 66 // 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 67 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 68 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 69 // 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd, 70 // 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd, 71 // 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, 72 // 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d, 73 // 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd, 74 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 75 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd, 76 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735, 77 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd, 78 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 79 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd, 80 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 81 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 82 // 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd, 83 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 84 // 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 85 // 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 86 // 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd, 87 // }; 88 89 // static char Symbols_b2c[] = { 90 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 91 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 92 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 93 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 94 // 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d, 95 // 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd, 96 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 97 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 98 // 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393, 99 // 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, 100 // 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9, 101 // 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd, 102 // 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3, 103 // 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf, 104 // 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9, 105 // 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd, 106 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 107 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 108 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 109 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 110 // 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd, 111 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 112 // 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219, 113 // 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd, 114 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229, 115 // 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, 116 // 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5, 117 // 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 118 // 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd, 119 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 120 // 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 121 // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 122 // }; 123 124 static final short ShiftJISEncoding = 2; 125 static final short GBKEncoding = 3; 126 static final short Big5Encoding = 4; 127 static final short WansungEncoding = 5; 128 static final short JohabEncoding = 6; 129 static final short MSUnicodeSurrogateEncoding = 10; 130 131 static final char noSuchChar = (char)0xfffd; 132 static final int SHORTMASK = 0x0000ffff; 133 static final int INTMASK = 0x7fffffff; 134 135 static final char[][] converterMaps = new char[7][]; 136 137 /* 138 * Unicode->other encoding translation array. A pre-computed look up 139 * which can be shared across all fonts using that encoding. 140 * Using this saves running character coverters repeatedly. 141 */ 142 char[] xlat; 143 UVS uvs = null; 144 145 static CMap initialize(TrueTypeFont font) { 146 147 CMap cmap = null; 148 149 int offset, platformID, encodingID=-1; 150 151 int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0, 152 three6=0, three10=0; 153 int zero5 = 0; // for Unicode Variation Sequences 154 boolean threeStar = false; 155 156 ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag); 157 int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag); 158 short numberSubTables = cmapBuffer.getShort(2); 159 160 /* locate the offsets of all 3,* (ie Microsoft platform) encodings */ 161 for (int i=0; i<numberSubTables; i++) { 162 cmapBuffer.position(i * 8 + 4); 163 platformID = cmapBuffer.getShort(); 164 if (platformID == 3) { 165 threeStar = true; 166 encodingID = cmapBuffer.getShort(); 167 offset = cmapBuffer.getInt(); 168 switch (encodingID) { 169 case 0: three0 = offset; break; // MS Symbol encoding 170 case 1: three1 = offset; break; // MS Unicode cmap 171 case 2: three2 = offset; break; // ShiftJIS cmap. 172 case 3: three3 = offset; break; // GBK cmap 173 case 4: three4 = offset; break; // Big 5 cmap 174 case 5: three5 = offset; break; // Wansung 175 case 6: three6 = offset; break; // Johab 176 case 10: three10 = offset; break; // MS Unicode surrogates 177 } 178 } else if (platformID == 0) { 179 encodingID = cmapBuffer.getShort(); 180 offset = cmapBuffer.getInt(); 181 if (encodingID == 5) { 182 zero5 = offset; 183 } 184 } 185 } 186 187 /* This defines the preference order for cmap subtables */ 188 if (threeStar) { 189 if (three10 != 0) { 190 cmap = createCMap(cmapBuffer, three10, null); 191 } 192 else if (three0 != 0) { 193 /* The special case treatment of these fonts leads to 194 * anomalies where a user can view "wingdings" and "wingdings2" 195 * and the latter shows all its code points in the unicode 196 * private use area at 0xF000->0XF0FF and the former shows 197 * a scattered subset of its glyphs that are known mappings to 198 * unicode code points. 199 * The primary purpose of these mappings was to facilitate 200 * display of symbol chars etc in composite fonts, however 201 * this is not needed as all these code points are covered 202 * by some other platform symbol font. 203 * Commenting this out reduces the role of these two files 204 * (assuming that they continue to be used in font.properties) 205 * to just one of contributing to the overall composite 206 * font metrics, and also AWT can still access the fonts. 207 * Clients which explicitly accessed these fonts as names 208 * "Symbol" and "Wingdings" (ie as physical fonts) and 209 * expected to see a scattering of these characters will 210 * see them now as missing. How much of a problem is this? 211 * Perhaps we could still support this mapping just for 212 * "Symbol.ttf" but I suspect some users would prefer it 213 * to be mapped in to the Latin range as that is how 214 * the "symbol" font is used in native apps. 215 */ 216 // String name = font.platName.toLowerCase(Locale.ENGLISH); 217 // if (name.endsWith("symbol.ttf")) { 218 // cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c); 219 // } else if (name.endsWith("wingding.ttf")) { 220 // cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c); 221 // } else { 222 cmap = createCMap(cmapBuffer, three0, null); 223 // } 224 } 225 else if (three1 != 0) { 226 cmap = createCMap(cmapBuffer, three1, null); 227 } 228 else if (three2 != 0) { 229 cmap = createCMap(cmapBuffer, three2, 230 getConverterMap(ShiftJISEncoding)); 231 } 232 else if (three3 != 0) { 233 cmap = createCMap(cmapBuffer, three3, 234 getConverterMap(GBKEncoding)); 235 } 236 else if (three4 != 0) { 237 cmap = createCMap(cmapBuffer, three4, 238 getConverterMap(Big5Encoding)); 239 } 240 else if (three5 != 0) { 241 cmap = createCMap(cmapBuffer, three5, 242 getConverterMap(WansungEncoding)); 243 } 244 else if (three6 != 0) { 245 cmap = createCMap(cmapBuffer, three6, 246 getConverterMap(JohabEncoding)); 247 } 248 } else { 249 /* No 3,* subtable was found. Just use whatever is the first 250 * table listed. Not very useful but maybe better than 251 * rejecting the font entirely? 252 */ 253 cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null); 254 } 255 // For Unicode Variation Sequences 256 if (cmap != null && zero5 != 0) { 257 cmap.createUVS(cmapBuffer, zero5); 258 } 259 return cmap; 260 } 261 262 /* speed up the converting by setting the range for double 263 * byte characters; 264 */ 265 static char[] getConverter(short encodingID) { 266 int dBegin = 0x8000; 267 int dEnd = 0xffff; 268 String encoding; 269 270 switch (encodingID) { 271 case ShiftJISEncoding: 272 dBegin = 0x8140; 273 dEnd = 0xfcfc; 274 encoding = "SJIS"; 275 break; 276 case GBKEncoding: 277 dBegin = 0x8140; 278 dEnd = 0xfea0; 279 encoding = "GBK"; 280 break; 281 case Big5Encoding: 282 dBegin = 0xa140; 283 dEnd = 0xfefe; 284 encoding = "Big5"; 285 break; 286 case WansungEncoding: 287 dBegin = 0xa1a1; 288 dEnd = 0xfede; 289 encoding = "EUC_KR"; 290 break; 291 case JohabEncoding: 292 dBegin = 0x8141; 293 dEnd = 0xfdfe; 294 encoding = "Johab"; 295 break; 296 default: 297 return null; 298 } 299 300 try { 301 char[] convertedChars = new char[65536]; 302 for (int i=0; i<65536; i++) { 303 convertedChars[i] = noSuchChar; 304 } 305 306 byte[] inputBytes = new byte[(dEnd-dBegin+1)*2]; 307 char[] outputChars = new char[(dEnd-dBegin+1)]; 308 309 int j = 0; 310 int firstByte; 311 if (encodingID == ShiftJISEncoding) { 312 for (int i = dBegin; i <= dEnd; i++) { 313 firstByte = (i >> 8 & 0xff); 314 if (firstByte >= 0xa1 && firstByte <= 0xdf) { 315 //sjis halfwidth katakana 316 inputBytes[j++] = (byte)0xff; 317 inputBytes[j++] = (byte)0xff; 318 } else { 319 inputBytes[j++] = (byte)firstByte; 320 inputBytes[j++] = (byte)(i & 0xff); 321 } 322 } 323 } else { 324 for (int i = dBegin; i <= dEnd; i++) { 325 inputBytes[j++] = (byte)(i>>8 & 0xff); 326 inputBytes[j++] = (byte)(i & 0xff); 327 } 328 } 329 330 Charset.forName(encoding).newDecoder() 331 .onMalformedInput(CodingErrorAction.REPLACE) 332 .onUnmappableCharacter(CodingErrorAction.REPLACE) 333 .replaceWith("\u0000") 334 .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length), 335 CharBuffer.wrap(outputChars, 0, outputChars.length), 336 true); 337 338 // ensure single byte ascii 339 for (int i = 0x20; i <= 0x7e; i++) { 340 convertedChars[i] = (char)i; 341 } 342 343 //sjis halfwidth katakana 344 if (encodingID == ShiftJISEncoding) { 345 for (int i = 0xa1; i <= 0xdf; i++) { 346 convertedChars[i] = (char)(i - 0xa1 + 0xff61); 347 } 348 } 349 350 /* It would save heap space (approx 60Kbytes for each of these 351 * converters) if stored only valid ranges (ie returned 352 * outputChars directly. But this is tricky since want to 353 * include the ASCII range too. 354 */ 355 // System.err.println("oc.len="+outputChars.length); 356 // System.err.println("cc.len="+convertedChars.length); 357 // System.err.println("dbegin="+dBegin); 358 System.arraycopy(outputChars, 0, convertedChars, dBegin, 359 outputChars.length); 360 361 //return convertedChars; 362 /* invert this map as now want it to map from Unicode 363 * to other encoding. 364 */ 365 char [] invertedChars = new char[65536]; 366 for (int i=0;i<65536;i++) { 367 if (convertedChars[i] != noSuchChar) { 368 invertedChars[convertedChars[i]] = (char)i; 369 } 370 } 371 return invertedChars; 372 373 } catch (Exception e) { 374 e.printStackTrace(); 375 } 376 return null; 377 } 378 379 /* 380 * The returned array maps to unicode from some other 2 byte encoding 381 * eg for a 2byte index which represents a SJIS char, the indexed 382 * value is the corresponding unicode char. 383 */ 384 static char[] getConverterMap(short encodingID) { 385 if (converterMaps[encodingID] == null) { 386 converterMaps[encodingID] = getConverter(encodingID); 387 } 388 return converterMaps[encodingID]; 389 } 390 391 392 static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) { 393 /* First do a sanity check that this cmap subtable is contained 394 * within the cmap table. 395 */ 396 int subtableFormat = buffer.getChar(offset); 397 long subtableLength; 398 if (subtableFormat < 8) { 399 subtableLength = buffer.getChar(offset+2); 400 } else { 401 subtableLength = buffer.getInt(offset+4) & INTMASK; 402 } 403 if (offset+subtableLength > buffer.capacity()) { 404 if (FontUtilities.isLogging()) { 405 FontUtilities.getLogger().warning("Cmap subtable overflows buffer."); 406 } 407 } 408 switch (subtableFormat) { 409 case 0: return new CMapFormat0(buffer, offset); 410 case 2: return new CMapFormat2(buffer, offset, xlat); 411 case 4: return new CMapFormat4(buffer, offset, xlat); 412 case 6: return new CMapFormat6(buffer, offset, xlat); 413 case 8: return new CMapFormat8(buffer, offset, xlat); 414 case 10: return new CMapFormat10(buffer, offset, xlat); 415 case 12: return new CMapFormat12(buffer, offset, xlat); 416 default: throw new RuntimeException("Cmap format unimplemented: " + 417 (int)buffer.getChar(offset)); 418 } 419 } 420 421 private void createUVS(ByteBuffer buffer, int offset) { 422 int subtableFormat = buffer.getChar(offset); 423 if (subtableFormat == 14) { 424 long subtableLength = buffer.getInt(offset + 2) & INTMASK; 425 if (offset + subtableLength > buffer.capacity()) { 426 if (FontUtilities.isLogging()) { 427 FontUtilities.getLogger() 428 .warning("Cmap UVS subtable overflows buffer."); 429 } 430 } 431 try { 432 this.uvs = new UVS(buffer, offset); 433 } catch (Throwable t) { 434 t.printStackTrace(); 435 } 436 } 437 return; 438 } 439 440 /* 441 final char charVal(byte[] cmap, int index) { 442 return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); 443 } 444 445 final short shortVal(byte[] cmap, int index) { 446 return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); 447 } 448 */ 449 abstract char getGlyph(int charCode); 450 451 /* Format 4 Header is 452 * ushort format (off=0) 453 * ushort length (off=2) 454 * ushort language (off=4) 455 * ushort segCountX2 (off=6) 456 * ushort searchRange (off=8) 457 * ushort entrySelector (off=10) 458 * ushort rangeShift (off=12) 459 * ushort endCount[segCount] (off=14) 460 * ushort reservedPad 461 * ushort startCount[segCount] 462 * short idDelta[segCount] 463 * idRangeOFfset[segCount] 464 * ushort glyphIdArray[] 465 */ 466 static class CMapFormat4 extends CMap { 467 int segCount; 468 int entrySelector; 469 int rangeShift; 470 char[] endCount; 471 char[] startCount; 472 short[] idDelta; 473 char[] idRangeOffset; 474 char[] glyphIds; 475 476 CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) { 477 478 this.xlat = xlat; 479 480 bbuffer.position(offset); 481 CharBuffer buffer = bbuffer.asCharBuffer(); 482 buffer.get(); // skip, we already know format=4 483 int subtableLength = buffer.get(); 484 /* Try to recover from some bad fonts which specify a subtable 485 * length that would overflow the byte buffer holding the whole 486 * cmap table. If this isn't a recoverable situation an exception 487 * may be thrown which is caught higher up the call stack. 488 * Whilst this may seem lenient, in practice, unless the "bad" 489 * subtable we are using is the last one in the cmap table we 490 * would have no way of knowing about this problem anyway. 491 */ 492 if (offset+subtableLength > bbuffer.capacity()) { 493 subtableLength = bbuffer.capacity() - offset; 494 } 495 buffer.get(); // skip language 496 segCount = buffer.get()/2; 497 int searchRange = buffer.get(); 498 entrySelector = buffer.get(); 499 rangeShift = buffer.get()/2; 500 startCount = new char[segCount]; 501 endCount = new char[segCount]; 502 idDelta = new short[segCount]; 503 idRangeOffset = new char[segCount]; 504 505 for (int i=0; i<segCount; i++) { 506 endCount[i] = buffer.get(); 507 } 508 buffer.get(); // 2 bytes for reserved pad 509 for (int i=0; i<segCount; i++) { 510 startCount[i] = buffer.get(); 511 } 512 513 for (int i=0; i<segCount; i++) { 514 idDelta[i] = (short)buffer.get(); 515 } 516 517 for (int i=0; i<segCount; i++) { 518 char ctmp = buffer.get(); 519 idRangeOffset[i] = (char)((ctmp>>1)&0xffff); 520 } 521 /* Can calculate the number of glyph IDs by subtracting 522 * "pos" from the length of the cmap 523 */ 524 int pos = (segCount*8+16)/2; 525 buffer.position(pos); 526 int numGlyphIds = (subtableLength/2 - pos); 527 glyphIds = new char[numGlyphIds]; 528 for (int i=0;i<numGlyphIds;i++) { 529 glyphIds[i] = buffer.get(); 530 } 531 /* 532 System.err.println("segcount="+segCount); 533 System.err.println("entrySelector="+entrySelector); 534 System.err.println("rangeShift="+rangeShift); 535 for (int j=0;j<segCount;j++) { 536 System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+ 537 " ec="+(int)(endCount[j]&0xffff)+ 538 " delta="+idDelta[j] + 539 " ro="+(int)idRangeOffset[j]); 540 } 541 542 //System.err.println("numglyphs="+glyphIds.length); 543 for (int i=0;i<numGlyphIds;i++) { 544 System.err.println("gid["+i+"]="+(int)glyphIds[i]); 545 } 546 */ 547 } 548 549 char getGlyph(int charCode) { 550 551 final int origCharCode = charCode; 552 int index = 0; 553 char glyphCode = 0; 554 555 int controlGlyph = getControlCodeGlyph(charCode, true); 556 if (controlGlyph >= 0) { 557 return (char)controlGlyph; 558 } 559 560 /* presence of translation array indicates that this 561 * cmap is in some other (non-unicode encoding). 562 * In order to look-up a char->glyph mapping we need to 563 * translate the unicode code point to the encoding of 564 * the cmap. 565 * REMIND: VALID CHARCODES?? 566 */ 567 if (xlat != null) { 568 charCode = xlat[charCode]; 569 } 570 571 /* 572 * Citation from the TrueType (and OpenType) spec: 573 * The segments are sorted in order of increasing endCode 574 * values, and the segment values are specified in four parallel 575 * arrays. You search for the first endCode that is greater than 576 * or equal to the character code you want to map. If the 577 * corresponding startCode is less than or equal to the 578 * character code, then you use the corresponding idDelta and 579 * idRangeOffset to map the character code to a glyph index 580 * (otherwise, the missingGlyph is returned). 581 */ 582 583 /* 584 * CMAP format4 defines several fields for optimized search of 585 * the segment list (entrySelector, searchRange, rangeShift). 586 * However, benefits are neglible and some fonts have incorrect 587 * data - so we use straightforward binary search (see bug 6247425) 588 */ 589 int left = 0, right = startCount.length; 590 index = startCount.length >> 1; 591 while (left < right) { 592 if (endCount[index] < charCode) { 593 left = index + 1; 594 } else { 595 right = index; 596 } 597 index = (left + right) >> 1; 598 } 599 600 if (charCode >= startCount[index] && charCode <= endCount[index]) { 601 int rangeOffset = idRangeOffset[index]; 602 603 if (rangeOffset == 0) { 604 glyphCode = (char)(charCode + idDelta[index]); 605 } else { 606 /* Calculate an index into the glyphIds array */ 607 608 /* 609 System.err.println("rangeoffset="+rangeOffset+ 610 " charCode=" + charCode + 611 " scnt["+index+"]="+(int)startCount[index] + 612 " segCnt="+segCount); 613 */ 614 615 int glyphIDIndex = rangeOffset - segCount + index 616 + (charCode - startCount[index]); 617 glyphCode = glyphIds[glyphIDIndex]; 618 if (glyphCode != 0) { 619 glyphCode = (char)(glyphCode + idDelta[index]); 620 } 621 } 622 } 623 if (glyphCode == 0) { 624 glyphCode = getFormatCharGlyph(origCharCode); 625 } 626 return glyphCode; 627 } 628 } 629 630 // Format 0: Byte Encoding table 631 static class CMapFormat0 extends CMap { 632 byte [] cmap; 633 634 CMapFormat0(ByteBuffer buffer, int offset) { 635 636 /* skip 6 bytes of format, length, and version */ 637 int len = buffer.getChar(offset+2); 638 cmap = new byte[len-6]; 639 buffer.position(offset+6); 640 buffer.get(cmap); 641 } 642 643 char getGlyph(int charCode) { 644 if (charCode < 256) { 645 if (charCode < 0x0010) { 646 switch (charCode) { 647 case 0x0009: 648 case 0x000a: 649 case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; 650 } 651 } 652 return (char)(0xff & cmap[charCode]); 653 } else { 654 return 0; 655 } 656 } 657 } 658 659 // static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) { 660 661 // CMap cmap = createCMap(buffer, offset, null); 662 // if (cmap == null) { 663 // return null; 664 // } else { 665 // return new CMapFormatSymbol(cmap, syms); 666 // } 667 // } 668 669 // static class CMapFormatSymbol extends CMap { 670 671 // CMap cmap; 672 // static final int NUM_BUCKETS = 128; 673 // Bucket[] buckets = new Bucket[NUM_BUCKETS]; 674 675 // class Bucket { 676 // char unicode; 677 // char glyph; 678 // Bucket next; 679 680 // Bucket(char u, char g) { 681 // unicode = u; 682 // glyph = g; 683 // } 684 // } 685 686 // CMapFormatSymbol(CMap cmap, char[] syms) { 687 688 // this.cmap = cmap; 689 690 // for (int i=0;i<syms.length;i++) { 691 // char unicode = syms[i]; 692 // if (unicode != noSuchChar) { 693 // char glyph = cmap.getGlyph(i + 0xf000); 694 // int hash = unicode % NUM_BUCKETS; 695 // Bucket bucket = new Bucket(unicode, glyph); 696 // if (buckets[hash] == null) { 697 // buckets[hash] = bucket; 698 // } else { 699 // Bucket b = buckets[hash]; 700 // while (b.next != null) { 701 // b = b.next; 702 // } 703 // b.next = bucket; 704 // } 705 // } 706 // } 707 // } 708 709 // char getGlyph(int unicode) { 710 // if (unicode >= 0x1000) { 711 // return 0; 712 // } 713 // else if (unicode >=0xf000 && unicode < 0xf100) { 714 // return cmap.getGlyph(unicode); 715 // } else { 716 // Bucket b = buckets[unicode % NUM_BUCKETS]; 717 // while (b != null) { 718 // if (b.unicode == unicode) { 719 // return b.glyph; 720 // } else { 721 // b = b.next; 722 // } 723 // } 724 // return 0; 725 // } 726 // } 727 // } 728 729 // Format 2: High-byte mapping through table 730 static class CMapFormat2 extends CMap { 731 732 char[] subHeaderKey = new char[256]; 733 /* Store subheaders in individual arrays 734 * A SubHeader entry theortically looks like { 735 * char firstCode; 736 * char entryCount; 737 * short idDelta; 738 * char idRangeOffset; 739 * } 740 */ 741 char[] firstCodeArray; 742 char[] entryCountArray; 743 short[] idDeltaArray; 744 char[] idRangeOffSetArray; 745 746 char[] glyphIndexArray; 747 748 CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) { 749 750 this.xlat = xlat; 751 752 int tableLen = buffer.getChar(offset+2); 753 buffer.position(offset+6); 754 CharBuffer cBuffer = buffer.asCharBuffer(); 755 char maxSubHeader = 0; 756 for (int i=0;i<256;i++) { 757 subHeaderKey[i] = cBuffer.get(); 758 if (subHeaderKey[i] > maxSubHeader) { 759 maxSubHeader = subHeaderKey[i]; 760 } 761 } 762 /* The value of the subHeaderKey is 8 * the subHeader index, 763 * so the number of subHeaders can be obtained by dividing 764 * this value bv 8 and adding 1. 765 */ 766 int numSubHeaders = (maxSubHeader >> 3) +1; 767 firstCodeArray = new char[numSubHeaders]; 768 entryCountArray = new char[numSubHeaders]; 769 idDeltaArray = new short[numSubHeaders]; 770 idRangeOffSetArray = new char[numSubHeaders]; 771 for (int i=0; i<numSubHeaders; i++) { 772 firstCodeArray[i] = cBuffer.get(); 773 entryCountArray[i] = cBuffer.get(); 774 idDeltaArray[i] = (short)cBuffer.get(); 775 idRangeOffSetArray[i] = cBuffer.get(); 776 // System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+ 777 // " ec="+(int)entryCountArray[i]+ 778 // " delta="+(int)idDeltaArray[i]+ 779 // " offset="+(int)idRangeOffSetArray[i]); 780 } 781 782 int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2; 783 glyphIndexArray = new char[glyphIndexArrSize]; 784 for (int i=0; i<glyphIndexArrSize;i++) { 785 glyphIndexArray[i] = cBuffer.get(); 786 } 787 } 788 789 char getGlyph(int charCode) { 790 final int origCharCode = charCode; 791 int controlGlyph = getControlCodeGlyph(charCode, true); 792 if (controlGlyph >= 0) { 793 return (char)controlGlyph; 794 } 795 796 if (xlat != null) { 797 charCode = xlat[charCode]; 798 } 799 800 char highByte = (char)(charCode >> 8); 801 char lowByte = (char)(charCode & 0xff); 802 int key = subHeaderKey[highByte]>>3; // index into subHeaders 803 char mapMe; 804 805 if (key != 0) { 806 mapMe = lowByte; 807 } else { 808 mapMe = highByte; 809 if (mapMe == 0) { 810 mapMe = lowByte; 811 } 812 } 813 814 // System.err.println("charCode="+Integer.toHexString(charCode)+ 815 // " key="+key+ " mapMe="+Integer.toHexString(mapMe)); 816 char firstCode = firstCodeArray[key]; 817 if (mapMe < firstCode) { 818 return 0; 819 } else { 820 mapMe -= firstCode; 821 } 822 823 if (mapMe < entryCountArray[key]) { 824 /* "address" arithmetic is needed to calculate the offset 825 * into glyphIndexArray. "idRangeOffSetArray[key]" specifies 826 * the number of bytes from that location in the table where 827 * the subarray of glyphIndexes starting at "firstCode" begins. 828 * Each entry in the subHeader table is 8 bytes, and the 829 * idRangeOffSetArray field is at offset 6 in the entry. 830 * The glyphIndexArray immediately follows the subHeaders. 831 * So if there are "N" entries then the number of bytes to the 832 * start of glyphIndexArray is (N-key)*8-6. 833 * Subtract this from the idRangeOffSetArray value to get 834 * the number of bytes into glyphIndexArray and divide by 2 to 835 * get the (char) array index. 836 */ 837 int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6; 838 int glyphSubArrayStart = 839 (idRangeOffSetArray[key] - glyphArrayOffset)/2; 840 char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe]; 841 if (glyphCode != 0) { 842 glyphCode += idDeltaArray[key]; //idDelta 843 return glyphCode; 844 } 845 } 846 return getFormatCharGlyph(origCharCode); 847 } 848 } 849 850 // Format 6: Trimmed table mapping 851 static class CMapFormat6 extends CMap { 852 853 char firstCode; 854 char entryCount; 855 char[] glyphIdArray; 856 857 CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) { 858 859 bbuffer.position(offset+6); 860 CharBuffer buffer = bbuffer.asCharBuffer(); 861 firstCode = buffer.get(); 862 entryCount = buffer.get(); 863 glyphIdArray = new char[entryCount]; 864 for (int i=0; i< entryCount; i++) { 865 glyphIdArray[i] = buffer.get(); 866 } 867 } 868 869 char getGlyph(int charCode) { 870 final int origCharCode = charCode; 871 int controlGlyph = getControlCodeGlyph(charCode, true); 872 if (controlGlyph >= 0) { 873 return (char)controlGlyph; 874 } 875 876 if (xlat != null) { 877 charCode = xlat[charCode]; 878 } 879 880 charCode -= firstCode; 881 if (charCode < 0 || charCode >= entryCount) { 882 return getFormatCharGlyph(origCharCode); 883 } else { 884 return glyphIdArray[charCode]; 885 } 886 } 887 } 888 889 // Format 8: mixed 16-bit and 32-bit coverage 890 // Seems unlikely this code will ever get tested as we look for 891 // MS platform Cmaps and MS states (in the Opentype spec on their website) 892 // that MS doesn't support this format 893 static class CMapFormat8 extends CMap { 894 byte[] is32 = new byte[8192]; 895 int nGroups; 896 int[] startCharCode; 897 int[] endCharCode; 898 int[] startGlyphID; 899 900 CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) { 901 902 bbuffer.position(12); 903 bbuffer.get(is32); 904 nGroups = bbuffer.getInt() & INTMASK; 905 // A map group record is three uint32's making for 12 bytes total 906 if (bbuffer.remaining() < (12 * (long)nGroups)) { 907 throw new RuntimeException("Format 8 table exceeded"); 908 } 909 startCharCode = new int[nGroups]; 910 endCharCode = new int[nGroups]; 911 startGlyphID = new int[nGroups]; 912 } 913 914 char getGlyph(int charCode) { 915 if (xlat != null) { 916 throw new RuntimeException("xlat array for cmap fmt=8"); 917 } 918 return 0; 919 } 920 921 } 922 923 924 // Format 4-byte 10: Trimmed table mapping 925 // Seems unlikely this code will ever get tested as we look for 926 // MS platform Cmaps and MS states (in the Opentype spec on their website) 927 // that MS doesn't support this format 928 static class CMapFormat10 extends CMap { 929 930 long firstCode; 931 int entryCount; 932 char[] glyphIdArray; 933 934 CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) { 935 936 bbuffer.position(offset+12); 937 firstCode = bbuffer.getInt() & INTMASK; 938 entryCount = bbuffer.getInt() & INTMASK; 939 // each glyph is a uint16, so 2 bytes per value. 940 if (bbuffer.remaining() < (2 * (long)entryCount)) { 941 throw new RuntimeException("Format 10 table exceeded"); 942 } 943 CharBuffer buffer = bbuffer.asCharBuffer(); 944 glyphIdArray = new char[entryCount]; 945 for (int i=0; i< entryCount; i++) { 946 glyphIdArray[i] = buffer.get(); 947 } 948 } 949 950 char getGlyph(int charCode) { 951 952 if (xlat != null) { 953 throw new RuntimeException("xlat array for cmap fmt=10"); 954 } 955 956 int code = (int)(charCode - firstCode); 957 if (code < 0 || code >= entryCount) { 958 return 0; 959 } else { 960 return glyphIdArray[code]; 961 } 962 } 963 } 964 965 // Format 12: Segmented coverage for UCS-4 (fonts supporting 966 // surrogate pairs) 967 static class CMapFormat12 extends CMap { 968 969 int numGroups; 970 int highBit =0; 971 int power; 972 int extra; 973 long[] startCharCode; 974 long[] endCharCode; 975 int[] startGlyphID; 976 977 CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) { 978 if (xlat != null) { 979 throw new RuntimeException("xlat array for cmap fmt=12"); 980 } 981 982 buffer.position(offset+12); 983 numGroups = buffer.getInt() & INTMASK; 984 // A map group record is three uint32's making for 12 bytes total 985 if (buffer.remaining() < (12 * (long)numGroups)) { 986 throw new RuntimeException("Format 12 table exceeded"); 987 } 988 startCharCode = new long[numGroups]; 989 endCharCode = new long[numGroups]; 990 startGlyphID = new int[numGroups]; 991 buffer = buffer.slice(); 992 IntBuffer ibuffer = buffer.asIntBuffer(); 993 for (int i=0; i<numGroups; i++) { 994 startCharCode[i] = ibuffer.get() & INTMASK; 995 endCharCode[i] = ibuffer.get() & INTMASK; 996 startGlyphID[i] = ibuffer.get() & INTMASK; 997 } 998 999 /* Finds the high bit by binary searching through the bits */ 1000 int value = numGroups; 1001 1002 if (value >= 1 << 16) { 1003 value >>= 16; 1004 highBit += 16; 1005 } 1006 1007 if (value >= 1 << 8) { 1008 value >>= 8; 1009 highBit += 8; 1010 } 1011 1012 if (value >= 1 << 4) { 1013 value >>= 4; 1014 highBit += 4; 1015 } 1016 1017 if (value >= 1 << 2) { 1018 value >>= 2; 1019 highBit += 2; 1020 } 1021 1022 if (value >= 1 << 1) { 1023 value >>= 1; 1024 highBit += 1; 1025 } 1026 1027 power = 1 << highBit; 1028 extra = numGroups - power; 1029 } 1030 1031 char getGlyph(int charCode) { 1032 final int origCharCode = charCode; 1033 int controlGlyph = getControlCodeGlyph(charCode, false); 1034 if (controlGlyph >= 0) { 1035 return (char)controlGlyph; 1036 } 1037 int probe = power; 1038 int range = 0; 1039 1040 if (startCharCode[extra] <= charCode) { 1041 range = extra; 1042 } 1043 1044 while (probe > 1) { 1045 probe >>= 1; 1046 1047 if (startCharCode[range+probe] <= charCode) { 1048 range += probe; 1049 } 1050 } 1051 1052 if (startCharCode[range] <= charCode && 1053 endCharCode[range] >= charCode) { 1054 return (char) 1055 (startGlyphID[range] + (charCode - startCharCode[range])); 1056 } 1057 1058 return getFormatCharGlyph(origCharCode); 1059 } 1060 1061 } 1062 1063 /* Used to substitute for bad Cmaps. */ 1064 static class NullCMapClass extends CMap { 1065 1066 char getGlyph(int charCode) { 1067 return 0; 1068 } 1069 } 1070 1071 public static final NullCMapClass theNullCmap = new NullCMapClass(); 1072 1073 final int getControlCodeGlyph(int charCode, boolean noSurrogates) { 1074 if (charCode < 0x0010) { 1075 switch (charCode) { 1076 case 0x0009: 1077 case 0x000a: 1078 case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; 1079 } 1080 } else if (noSurrogates && charCode >= 0xFFFF) { 1081 return 0; 1082 } 1083 return -1; 1084 } 1085 1086 final char getFormatCharGlyph(int charCode) { 1087 if (charCode >= 0x200c) { 1088 if ((charCode <= 0x200f) || 1089 (charCode >= 0x2028 && charCode <= 0x202e) || 1090 (charCode >= 0x206a && charCode <= 0x206f)) { 1091 return (char)CharToGlyphMapper.INVISIBLE_GLYPH_ID; 1092 } 1093 } 1094 return 0; 1095 } 1096 1097 static class UVS { 1098 int numSelectors; 1099 int[] selector; 1100 1101 //for Non-Default UVS Table 1102 int[] numUVSMapping; 1103 int[][] unicodeValue; 1104 char[][] glyphID; 1105 1106 UVS(ByteBuffer buffer, int offset) { 1107 buffer.position(offset+6); 1108 numSelectors = buffer.getInt() & INTMASK; 1109 // A variation selector record is one 3 byte int + two int32's 1110 // making for 11 bytes per record. 1111 if (buffer.remaining() < (11 * (long)numSelectors)) { 1112 throw new RuntimeException("Variations exceed buffer"); 1113 } 1114 selector = new int[numSelectors]; 1115 numUVSMapping = new int[numSelectors]; 1116 unicodeValue = new int[numSelectors][]; 1117 glyphID = new char[numSelectors][]; 1118 1119 for (int i = 0; i < numSelectors; i++) { 1120 buffer.position(offset + 10 + i * 11); 1121 selector[i] = (buffer.get() & 0xff) << 16; //UINT24 1122 selector[i] += (buffer.get() & 0xff) << 8; 1123 selector[i] += buffer.get() & 0xff; 1124 1125 //skip Default UVS Table 1126 1127 //for Non-Default UVS Table 1128 int tableOffset = buffer.getInt(offset + 10 + i * 11 + 7); 1129 if (tableOffset == 0) { 1130 numUVSMapping[i] = 0; 1131 } else if (tableOffset > 0) { 1132 buffer.position(offset+tableOffset); 1133 numUVSMapping[i] = buffer.getInt() & INTMASK; 1134 // a UVS mapping record is one 3 byte int + uint16 1135 // making for 5 bytes per record. 1136 if (buffer.remaining() < (5 * (long)numUVSMapping[i])) { 1137 throw new RuntimeException("Variations exceed buffer"); 1138 } 1139 unicodeValue[i] = new int[numUVSMapping[i]]; 1140 glyphID[i] = new char[numUVSMapping[i]]; 1141 1142 for (int j = 0; j < numUVSMapping[i]; j++) { 1143 int temp = (buffer.get() & 0xff) << 16; //UINT24 1144 temp += (buffer.get() & 0xff) << 8; 1145 temp += buffer.get() & 0xff; 1146 unicodeValue[i][j] = temp; 1147 glyphID[i][j] = buffer.getChar(); 1148 } 1149 } 1150 } 1151 } 1152 1153 static final int VS_NOGLYPH = 0; 1154 private int getGlyph(int charCode, int variationSelector) { 1155 int targetSelector = -1; 1156 for (int i = 0; i < numSelectors; i++) { 1157 if (selector[i] == variationSelector) { 1158 targetSelector = i; 1159 break; 1160 } 1161 } 1162 if (targetSelector == -1) { 1163 return VS_NOGLYPH; 1164 } 1165 if (numUVSMapping[targetSelector] > 0) { 1166 int index = java.util.Arrays.binarySearch( 1167 unicodeValue[targetSelector], charCode); 1168 if (index >= 0) { 1169 return glyphID[targetSelector][index]; 1170 } 1171 } 1172 return VS_NOGLYPH; 1173 } 1174 } 1175 1176 char getVariationGlyph(int charCode, int variationSelector) { 1177 char glyph = 0; 1178 if (uvs == null) { 1179 glyph = getGlyph(charCode); 1180 } else { 1181 int result = uvs.getGlyph(charCode, variationSelector); 1182 if (result > 0) { 1183 glyph = (char)(result & 0xFFFF); 1184 } else { 1185 glyph = getGlyph(charCode); 1186 } 1187 } 1188 return glyph; 1189 } 1190 }