1 /* 2 * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs.ext; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.CharsetEncoder; 32 import java.nio.charset.CharsetDecoder; 33 import java.nio.charset.CoderResult; 34 import java.security.AccessController; 35 import java.security.PrivilegedAction; 36 import java.util.Arrays; 37 import sun.nio.cs.CharsetMapping; 38 import sun.nio.cs.*; 39 40 /* 41 * 5 types of entry in SJIS_X_0213/Unicode mapping table 42 * 43 * (1)Single-Byte 44 * JIS_X_0213 does not define single-byte character itself, the 45 * JIS_X_0201 entries are added in for sjis implementation. 46 * 47 * (2)Double-Byte SJIS <-> BMP Unicode 48 * ex: 0x8140 U+3000 # IDEOGRAPHIC SPACE 49 * 50 * (3)Double-Byte SJIS <-> Supplementary 51 * ex: 0xFCF0 U+2A61A # <cjk> [2000] [Unicode3.1] 52 * 53 * (4)Double-Byte SJIS <-> Composite 54 * ex: 0x83F6 U+31F7+309A # [2000] 55 * 56 * (5)"Windows-only" special mapping entries 57 * are handled by MS932_0213. 58 */ 59 60 public class SJIS_0213 extends Charset { 61 public SJIS_0213() { 62 super("x-SJIS_0213", ExtendedCharsets.aliasesFor("SJIS_0213")); 63 } 64 65 public boolean contains(Charset cs) { 66 return ((cs.name().equals("US-ASCII")) 67 || (cs instanceof SJIS) 68 || (cs instanceof SJIS_0213)); 69 } 70 71 public CharsetDecoder newDecoder() { 72 return new Decoder(this); 73 } 74 75 public CharsetEncoder newEncoder() { 76 return new Encoder(this); 77 } 78 79 static CharsetMapping mapping = AccessController.doPrivileged( 80 new PrivilegedAction<CharsetMapping>() { 81 public CharsetMapping run() { 82 return CharsetMapping.get(SJIS_0213.class.getResourceAsStream("sjis0213.dat")); 83 } 84 }); 85 86 protected static class Decoder extends CharsetDecoder { 87 protected static final char UNMAPPABLE = CharsetMapping.UNMAPPABLE_DECODING; 88 89 protected Decoder(Charset cs) { 90 super(cs, 0.5f, 1.0f); 91 } 92 93 private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 94 byte[] sa = src.array(); 95 int sp = src.arrayOffset() + src.position(); 96 int sl = src.arrayOffset() + src.limit(); 97 98 char[] da = dst.array(); 99 int dp = dst.arrayOffset() + dst.position(); 100 int dl = dst.arrayOffset() + dst.limit(); 101 102 try { 103 while (sp < sl) { 104 int b1 = sa[sp] & 0xff; 105 char c = decodeSingle(b1); 106 int inSize = 1, outSize = 1; 107 char[] cc = null; 108 if (c == UNMAPPABLE) { 109 if (sl - sp < 2) 110 return CoderResult.UNDERFLOW; 111 int b2 = sa[sp + 1] & 0xff; 112 c = decodeDouble(b1, b2); 113 inSize++; 114 if (c == UNMAPPABLE) { 115 cc = decodeDoubleEx(b1, b2); 116 if (cc == null) { 117 if (decodeSingle(b2) == UNMAPPABLE) 118 return CoderResult.unmappableForLength(2); 119 else 120 return CoderResult.unmappableForLength(1); 121 } 122 outSize++; 123 } 124 } 125 if (dl - dp < outSize) 126 return CoderResult.OVERFLOW; 127 if (outSize == 2) { 128 da[dp++] = cc[0]; 129 da[dp++] = cc[1]; 130 } else { 131 da[dp++] = c; 132 } 133 sp += inSize; 134 } 135 return CoderResult.UNDERFLOW; 136 } finally { 137 src.position(sp - src.arrayOffset()); 138 dst.position(dp - dst.arrayOffset()); 139 } 140 } 141 142 private CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 143 int mark = src.position(); 144 try { 145 while (src.hasRemaining()) { 146 char[] cc = null; 147 int b1 = src.get() & 0xff; 148 char c = decodeSingle(b1); 149 int inSize = 1, outSize = 1; 150 if (c == UNMAPPABLE) { 151 if (src.remaining() < 1) 152 return CoderResult.UNDERFLOW; 153 int b2 = src.get() & 0xff; 154 inSize++; 155 c = decodeDouble(b1, b2); 156 if (c == UNMAPPABLE) { 157 cc = decodeDoubleEx(b1, b2); 158 if (cc == null) { 159 if (decodeSingle(b2) == UNMAPPABLE) 160 return CoderResult.unmappableForLength(2); 161 else 162 return CoderResult.unmappableForLength(1); 163 } 164 outSize++; 165 } 166 } 167 if (dst.remaining() < outSize) 168 return CoderResult.OVERFLOW; 169 if (outSize == 2) { 170 dst.put(cc[0]); 171 dst.put(cc[1]); 172 } else { 173 dst.put(c); 174 } 175 mark += inSize; 176 } 177 return CoderResult.UNDERFLOW; 178 } finally { 179 src.position(mark); 180 } 181 } 182 183 protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { 184 if (src.hasArray() && dst.hasArray()) 185 return decodeArrayLoop(src, dst); 186 else 187 return decodeBufferLoop(src, dst); 188 } 189 190 protected char decodeSingle(int b) { 191 return mapping.decodeSingle(b); 192 } 193 194 protected char decodeDouble(int b1, int b2) { 195 return mapping.decodeDouble(b1, b2); 196 } 197 198 private char[] cc = new char[2]; 199 private CharsetMapping.Entry comp = new CharsetMapping.Entry(); 200 protected char[] decodeDoubleEx(int b1, int b2) { 201 int db = (b1 << 8) | b2; 202 if (mapping.decodeSurrogate(db, cc) != null) 203 return cc; 204 comp.bs = db; 205 if (mapping.decodeComposite(comp, cc) != null) 206 return cc; 207 return null; 208 } 209 } 210 211 protected static class Encoder extends CharsetEncoder { 212 protected static final int UNMAPPABLE = CharsetMapping.UNMAPPABLE_ENCODING; 213 protected static final int MAX_SINGLEBYTE = 0xff; 214 215 protected Encoder(Charset cs) { 216 super(cs, 2.0f, 2.0f); 217 } 218 219 public boolean canEncode(char c) { 220 return (encodeChar(c) != UNMAPPABLE); 221 } 222 223 protected int encodeChar(char ch) { 224 return mapping.encodeChar(ch); 225 } 226 227 protected int encodeSurrogate(char hi, char lo) { 228 return mapping.encodeSurrogate(hi, lo); 229 } 230 231 private CharsetMapping.Entry comp = new CharsetMapping.Entry(); 232 protected int encodeComposite(char base, char cc) { 233 comp.cp = base; 234 comp.cp2 = cc; 235 return mapping.encodeComposite(comp); 236 } 237 238 protected boolean isCompositeBase(char ch) { 239 comp.cp = ch; 240 return mapping.isCompositeBase(comp); 241 } 242 243 // Unlike surrogate pair, the base character of a base+cc composite 244 // itself is a legal codepoint in 0213, if we simply return UNDERFLOW 245 // when a base candidate is the last input char in the CharBuffer, like 246 // what we do for the surrogte pair, encoding will fail if this base 247 // character is indeed the last character of the input char sequence. 248 // Keep this base candidate in "leftoverBase" so we can flush it out 249 // at the end of the encoding circle. 250 char leftoverBase = 0; 251 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 252 char[] sa = src.array(); 253 int sp = src.arrayOffset() + src.position(); 254 int sl = src.arrayOffset() + src.limit(); 255 byte[] da = dst.array(); 256 int dp = dst.arrayOffset() + dst.position(); 257 int dl = dst.arrayOffset() + dst.limit(); 258 259 try { 260 while (sp < sl) { 261 int db; 262 char c = sa[sp]; 263 if (leftoverBase != 0) { 264 boolean isComp = false; 265 db = encodeComposite(leftoverBase, c); 266 if (db == UNMAPPABLE) 267 db = encodeChar(leftoverBase); 268 else 269 isComp = true; 270 if (dl - dp < 2) 271 return CoderResult.OVERFLOW; 272 da[dp++] = (byte)(db >> 8); 273 da[dp++] = (byte)db; 274 leftoverBase = 0; 275 if (isComp) { 276 sp++; 277 continue; 278 } 279 } 280 if (isCompositeBase(c)) { 281 leftoverBase = c; 282 } else { 283 db = encodeChar(c); 284 if (db <= MAX_SINGLEBYTE) { // SingleByte 285 if (dl <= dp) 286 return CoderResult.OVERFLOW; 287 da[dp++] = (byte)db; 288 } else if (db != UNMAPPABLE) { // DoubleByte 289 if (dl - dp < 2) 290 return CoderResult.OVERFLOW; 291 da[dp++] = (byte)(db >> 8); 292 da[dp++] = (byte)db; 293 } else if (Character.isHighSurrogate(c)) { 294 if ((sp + 1) == sl) 295 return CoderResult.UNDERFLOW; 296 char c2 = sa[sp + 1]; 297 if (!Character.isLowSurrogate(c2)) 298 return CoderResult.malformedForLength(1); 299 db = encodeSurrogate(c, c2); 300 if (db == UNMAPPABLE) 301 return CoderResult.unmappableForLength(2); 302 if (dl - dp < 2) 303 return CoderResult.OVERFLOW; 304 da[dp++] = (byte)(db >> 8); 305 da[dp++] = (byte)db; 306 sp++; 307 } else if (Character.isLowSurrogate(c)) { 308 return CoderResult.malformedForLength(1); 309 } else { 310 return CoderResult.unmappableForLength(1); 311 } 312 } 313 sp++; 314 } 315 return CoderResult.UNDERFLOW; 316 } finally { 317 src.position(sp - src.arrayOffset()); 318 dst.position(dp - dst.arrayOffset()); 319 } 320 } 321 322 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 323 int mark = src.position(); 324 try { 325 while (src.hasRemaining()) { 326 int db; 327 char c = src.get(); 328 if (leftoverBase != 0) { 329 boolean isComp = false; 330 db = encodeComposite(leftoverBase, c); 331 if (db == UNMAPPABLE) 332 db = encodeChar(leftoverBase); 333 else 334 isComp = true; 335 if (dst.remaining() < 2) 336 return CoderResult.OVERFLOW; 337 dst.put((byte)(db >> 8)); 338 dst.put((byte)(db)); 339 leftoverBase = 0; 340 if (isComp) { 341 mark++; 342 continue; 343 } 344 } 345 if (isCompositeBase(c)) { 346 leftoverBase = c; 347 } else { 348 db = encodeChar(c); 349 if (db <= MAX_SINGLEBYTE) { // Single-byte 350 if (dst.remaining() < 1) 351 return CoderResult.OVERFLOW; 352 dst.put((byte)db); 353 } else if (db != UNMAPPABLE) { // DoubleByte 354 if (dst.remaining() < 2) 355 return CoderResult.OVERFLOW; 356 dst.put((byte)(db >> 8)); 357 dst.put((byte)(db)); 358 } else if (Character.isHighSurrogate(c)) { 359 if (!src.hasRemaining()) // Surrogates 360 return CoderResult.UNDERFLOW; 361 char c2 = src.get(); 362 if (!Character.isLowSurrogate(c2)) 363 return CoderResult.malformedForLength(1); 364 db = encodeSurrogate(c, c2); 365 if (db == UNMAPPABLE) 366 return CoderResult.unmappableForLength(2); 367 if (dst.remaining() < 2) 368 return CoderResult.OVERFLOW; 369 dst.put((byte)(db >> 8)); 370 dst.put((byte)(db)); 371 mark++; 372 } else if (Character.isLowSurrogate(c)) { 373 return CoderResult.malformedForLength(1); 374 } else { 375 return CoderResult.unmappableForLength(1); 376 } 377 } 378 mark++; 379 } 380 return CoderResult.UNDERFLOW; 381 } finally { 382 src.position(mark); 383 } 384 } 385 386 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { 387 if (src.hasArray() && dst.hasArray()) 388 return encodeArrayLoop(src, dst); 389 else 390 return encodeBufferLoop(src, dst); 391 } 392 393 protected CoderResult implFlush(ByteBuffer dst) { 394 if (leftoverBase > 0) { 395 if (dst.remaining() < 2) 396 return CoderResult.OVERFLOW; 397 int db = encodeChar(leftoverBase); 398 dst.put((byte)(db >> 8)); 399 dst.put((byte)(db)); 400 leftoverBase = 0; 401 } 402 return CoderResult.UNDERFLOW; 403 } 404 405 protected void implReset() { 406 leftoverBase = 0; 407 } 408 } 409 }