1 /*
   2  * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.nio.cs.ext;
  27 
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.charset.Charset;
  31 import java.nio.charset.CharsetEncoder;
  32 import java.nio.charset.CharsetDecoder;
  33 import java.nio.charset.CoderResult;
  34 import java.security.AccessController;
  35 import java.security.PrivilegedAction;
  36 import java.util.Arrays;
  37 import sun.nio.cs.CharsetMapping;
  38 import sun.nio.cs.*;
  39 
  40 /*
  41  *  5 types of entry in SJIS_X_0213/Unicode mapping table
  42  *
  43  *  (1)Single-Byte
  44  *     JIS_X_0213 does not define single-byte character itself, the
  45  *     JIS_X_0201 entries are added in for sjis implementation.
  46  *
  47  *  (2)Double-Byte SJIS <-> BMP Unicode
  48  *     ex: 0x8140 U+3000    # IDEOGRAPHIC SPACE
  49  *
  50  *  (3)Double-Byte SJIS <-> Supplementary
  51  *     ex: 0xFCF0 U+2A61A   # <cjk> [2000] [Unicode3.1]
  52  *
  53  *  (4)Double-Byte SJIS <-> Composite
  54  *   ex: 0x83F6 U+31F7+309A # [2000]
  55  *
  56  *  (5)"Windows-only" special mapping entries
  57  *     are handled by MS932_0213.
  58  */
  59 
  60 public class SJIS_0213 extends Charset {
  61     public SJIS_0213() {
  62         super("x-SJIS_0213", ExtendedCharsets.aliasesFor("SJIS_0213"));
  63     }
  64 
  65     public boolean contains(Charset cs) {
  66         return ((cs.name().equals("US-ASCII"))
  67                 || (cs instanceof SJIS)
  68                 || (cs instanceof SJIS_0213));
  69     }
  70 
  71     public CharsetDecoder newDecoder() {
  72         return new Decoder(this);
  73     }
  74 
  75     public CharsetEncoder newEncoder() {
  76         return new Encoder(this);
  77     }
  78 
  79     static CharsetMapping mapping = AccessController.doPrivileged(
  80         new PrivilegedAction<CharsetMapping>() {
  81             public CharsetMapping run() {
  82                 return CharsetMapping.get(SJIS_0213.class.getResourceAsStream("sjis0213.dat"));
  83             }
  84         });
  85 
  86     protected static class Decoder extends CharsetDecoder {
  87         protected static final char UNMAPPABLE = CharsetMapping.UNMAPPABLE_DECODING;
  88 
  89         protected Decoder(Charset cs) {
  90             super(cs, 0.5f, 1.0f);
  91         }
  92 
  93         private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
  94             byte[] sa = src.array();
  95             int sp = src.arrayOffset() + src.position();
  96             int sl = src.arrayOffset() + src.limit();
  97 
  98             char[] da = dst.array();
  99             int dp = dst.arrayOffset() + dst.position();
 100             int dl = dst.arrayOffset() + dst.limit();
 101 
 102             try {
 103                 while (sp < sl) {
 104                     int b1 = sa[sp] & 0xff;
 105                     char c = decodeSingle(b1);
 106                     int inSize = 1, outSize = 1;
 107                     char[] cc = null;
 108                     if (c == UNMAPPABLE) {
 109                         if (sl - sp < 2)
 110                             return CoderResult.UNDERFLOW;
 111                         int b2 = sa[sp + 1] & 0xff;
 112                         c = decodeDouble(b1, b2);
 113                         inSize++;
 114                         if (c == UNMAPPABLE) {
 115                             cc = decodeDoubleEx(b1, b2);
 116                             if (cc == null) {
 117                                 if (decodeSingle(b2) == UNMAPPABLE)
 118                                     return CoderResult.unmappableForLength(2);
 119                                 else
 120                                     return CoderResult.unmappableForLength(1);
 121                             }
 122                             outSize++;
 123                         }
 124                     }
 125                     if (dl - dp < outSize)
 126                         return CoderResult.OVERFLOW;
 127                     if (outSize == 2) {
 128                         da[dp++] = cc[0];
 129                         da[dp++] = cc[1];
 130                     } else {
 131                         da[dp++] = c;
 132                     }
 133                     sp += inSize;
 134                 }
 135                 return CoderResult.UNDERFLOW;
 136             } finally {
 137                 src.position(sp - src.arrayOffset());
 138                 dst.position(dp - dst.arrayOffset());
 139             }
 140         }
 141 
 142         private CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
 143             int mark = src.position();
 144             try {
 145                 while (src.hasRemaining()) {
 146                     char[] cc = null;
 147                     int b1 = src.get() & 0xff;
 148                     char c = decodeSingle(b1);
 149                     int inSize = 1, outSize = 1;
 150                     if (c == UNMAPPABLE) {
 151                         if (src.remaining() < 1)
 152                             return CoderResult.UNDERFLOW;
 153                         int b2 = src.get() & 0xff;
 154                         inSize++;
 155                         c = decodeDouble(b1, b2);
 156                         if (c == UNMAPPABLE) {
 157                             cc = decodeDoubleEx(b1, b2);
 158                             if (cc == null) {
 159                                 if (decodeSingle(b2) == UNMAPPABLE)
 160                                     return CoderResult.unmappableForLength(2);
 161                                 else
 162                                     return CoderResult.unmappableForLength(1);
 163                             }
 164                             outSize++;
 165                         }
 166                     }
 167                     if (dst.remaining() < outSize)
 168                         return CoderResult.OVERFLOW;
 169                     if (outSize == 2) {
 170                         dst.put(cc[0]);
 171                         dst.put(cc[1]);
 172                     } else {
 173                         dst.put(c);
 174                     }
 175                     mark += inSize;
 176                 }
 177                 return CoderResult.UNDERFLOW;
 178             } finally {
 179                 src.position(mark);
 180             }
 181         }
 182 
 183         protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
 184             if (src.hasArray() && dst.hasArray())
 185                 return decodeArrayLoop(src, dst);
 186             else
 187                 return decodeBufferLoop(src, dst);
 188         }
 189 
 190         protected char decodeSingle(int b) {
 191             return mapping.decodeSingle(b);
 192         }
 193 
 194         protected char decodeDouble(int b1, int b2) {
 195             return mapping.decodeDouble(b1, b2);
 196         }
 197 
 198         private char[] cc = new char[2];
 199         private CharsetMapping.Entry comp = new CharsetMapping.Entry();
 200         protected char[] decodeDoubleEx(int b1, int b2) {
 201             int db = (b1 << 8) | b2;
 202             if (mapping.decodeSurrogate(db, cc) != null)
 203                 return cc;
 204             comp.bs = db;
 205             if (mapping.decodeComposite(comp, cc) != null)
 206                 return cc;
 207             return null;
 208         }
 209     }
 210 
 211     protected static class Encoder extends CharsetEncoder {
 212         protected static final int UNMAPPABLE = CharsetMapping.UNMAPPABLE_ENCODING;
 213         protected static final int MAX_SINGLEBYTE = 0xff;
 214 
 215         protected Encoder(Charset cs) {
 216             super(cs, 2.0f, 2.0f);
 217         }
 218 
 219         public boolean canEncode(char c) {
 220             return (encodeChar(c) != UNMAPPABLE);
 221         }
 222 
 223         protected int encodeChar(char ch) {
 224             return mapping.encodeChar(ch);
 225         }
 226 
 227         protected int encodeSurrogate(char hi, char lo) {
 228             return mapping.encodeSurrogate(hi, lo);
 229         }
 230 
 231         private CharsetMapping.Entry comp = new CharsetMapping.Entry();
 232         protected int encodeComposite(char base, char cc) {
 233             comp.cp = base;
 234             comp.cp2 = cc;
 235             return mapping.encodeComposite(comp);
 236         }
 237 
 238         protected boolean isCompositeBase(char ch) {
 239             comp.cp = ch;
 240             return mapping.isCompositeBase(comp);
 241         }
 242 
 243         // Unlike surrogate pair, the base character of a base+cc composite
 244         // itself is a legal codepoint in 0213, if we simply return UNDERFLOW
 245         // when a base candidate is the last input char in the CharBuffer, like
 246         // what we do for the surrogte pair, encoding will fail if this base
 247         // character is indeed the last character of the input char sequence.
 248         // Keep this base candidate in "leftoverBase" so we can flush it out
 249         // at the end of the encoding circle.
 250         char leftoverBase = 0;
 251         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
 252             char[] sa = src.array();
 253             int sp = src.arrayOffset() + src.position();
 254             int sl = src.arrayOffset() + src.limit();
 255             byte[] da = dst.array();
 256             int dp = dst.arrayOffset() + dst.position();
 257             int dl = dst.arrayOffset() + dst.limit();
 258 
 259             try {
 260                 while (sp < sl) {
 261                     int db;
 262                     char c = sa[sp];
 263                     if (leftoverBase != 0) {
 264                         boolean isComp = false;
 265                         db = encodeComposite(leftoverBase, c);
 266                         if (db == UNMAPPABLE)
 267                             db = encodeChar(leftoverBase);
 268                         else
 269                             isComp = true;
 270                         if (dl - dp < 2)
 271                             return CoderResult.OVERFLOW;
 272                         da[dp++] = (byte)(db >> 8);
 273                         da[dp++] = (byte)db;
 274                         leftoverBase = 0;
 275                         if (isComp) {
 276                             sp++;
 277                             continue;
 278                         }
 279                     }
 280                     if (isCompositeBase(c)) {
 281                         leftoverBase = c;
 282                     } else {
 283                         db = encodeChar(c);
 284                         if (db <= MAX_SINGLEBYTE) {      // SingleByte
 285                             if (dl <= dp)
 286                                 return CoderResult.OVERFLOW;
 287                             da[dp++] = (byte)db;
 288                         } else if (db != UNMAPPABLE) {   // DoubleByte
 289                             if (dl - dp < 2)
 290                                 return CoderResult.OVERFLOW;
 291                             da[dp++] = (byte)(db >> 8);
 292                             da[dp++] = (byte)db;
 293                         } else if (Character.isHighSurrogate(c)) {
 294                             if ((sp + 1) == sl)
 295                                 return CoderResult.UNDERFLOW;
 296                             char c2 = sa[sp + 1];
 297                             if (!Character.isLowSurrogate(c2))
 298                                 return CoderResult.malformedForLength(1);
 299                             db = encodeSurrogate(c, c2);
 300                             if (db == UNMAPPABLE)
 301                                 return CoderResult.unmappableForLength(2);
 302                             if (dl - dp < 2)
 303                                 return CoderResult.OVERFLOW;
 304                             da[dp++] = (byte)(db >> 8);
 305                             da[dp++] = (byte)db;
 306                             sp++;
 307                         } else if (Character.isLowSurrogate(c)) {
 308                             return CoderResult.malformedForLength(1);
 309                         } else {
 310                             return CoderResult.unmappableForLength(1);
 311                         }
 312                     }
 313                     sp++;
 314                 }
 315                 return CoderResult.UNDERFLOW;
 316             } finally {
 317                 src.position(sp - src.arrayOffset());
 318                 dst.position(dp - dst.arrayOffset());
 319             }
 320         }
 321 
 322         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
 323             int mark = src.position();
 324             try {
 325                 while (src.hasRemaining()) {
 326                     int db;
 327                     char c = src.get();
 328                     if (leftoverBase != 0) {
 329                         boolean isComp = false;
 330                         db = encodeComposite(leftoverBase, c);
 331                         if (db == UNMAPPABLE)
 332                             db = encodeChar(leftoverBase);
 333                         else
 334                             isComp = true;
 335                         if (dst.remaining() < 2)
 336                             return CoderResult.OVERFLOW;
 337                         dst.put((byte)(db >> 8));
 338                         dst.put((byte)(db));
 339                         leftoverBase = 0;
 340                         if (isComp) {
 341                             mark++;
 342                             continue;
 343                         }
 344                     }
 345                     if (isCompositeBase(c)) {
 346                         leftoverBase = c;
 347                     } else {
 348                         db = encodeChar(c);
 349                         if (db <= MAX_SINGLEBYTE) {    // Single-byte
 350                             if (dst.remaining() < 1)
 351                                 return CoderResult.OVERFLOW;
 352                             dst.put((byte)db);
 353                         } else if (db != UNMAPPABLE) {   // DoubleByte
 354                             if (dst.remaining() < 2)
 355                                 return CoderResult.OVERFLOW;
 356                             dst.put((byte)(db >> 8));
 357                             dst.put((byte)(db));
 358                         } else if (Character.isHighSurrogate(c)) {
 359                             if (!src.hasRemaining())     // Surrogates
 360                                 return CoderResult.UNDERFLOW;
 361                             char c2 = src.get();
 362                             if (!Character.isLowSurrogate(c2))
 363                                 return CoderResult.malformedForLength(1);
 364                             db = encodeSurrogate(c, c2);
 365                             if (db == UNMAPPABLE)
 366                                 return CoderResult.unmappableForLength(2);
 367                             if (dst.remaining() < 2)
 368                                 return CoderResult.OVERFLOW;
 369                             dst.put((byte)(db >> 8));
 370                             dst.put((byte)(db));
 371                             mark++;
 372                         } else if (Character.isLowSurrogate(c)) {
 373                             return CoderResult.malformedForLength(1);
 374                         } else {
 375                             return CoderResult.unmappableForLength(1);
 376                         }
 377                     }
 378                     mark++;
 379                 }
 380                 return CoderResult.UNDERFLOW;
 381             } finally {
 382                 src.position(mark);
 383             }
 384         }
 385 
 386         protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
 387             if (src.hasArray() && dst.hasArray())
 388                 return encodeArrayLoop(src, dst);
 389             else
 390                 return encodeBufferLoop(src, dst);
 391         }
 392 
 393         protected CoderResult implFlush(ByteBuffer dst) {
 394             if (leftoverBase > 0) {
 395                 if (dst.remaining() < 2)
 396                     return CoderResult.OVERFLOW;
 397                 int db = encodeChar(leftoverBase);
 398                 dst.put((byte)(db >> 8));
 399                 dst.put((byte)(db));
 400                 leftoverBase = 0;
 401             }
 402             return CoderResult.UNDERFLOW;
 403         }
 404 
 405         protected void implReset() {
 406             leftoverBase = 0;
 407         }
 408     }
 409 }