1 /*
2 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
80 }
81
82 // A heuristic algorithm for guessing if EUC-decoded text really
83 // might be Japanese text. Better heuristics are possible...
84 private static boolean looksLikeJapanese(CharBuffer cb) {
85 int hiragana = 0; // Fullwidth Hiragana
86 int katakana = 0; // Halfwidth Katakana
87 while (cb.hasRemaining()) {
88 char c = cb.get();
89 if (0x3040 <= c && c <= 0x309f && ++hiragana > 1) return true;
90 if (0xff65 <= c && c <= 0xff9f && ++katakana > 1) return true;
91 }
92 return false;
93 }
94
95 private static class Decoder extends CharsetDecoder {
96 private final static String osName = AccessController.doPrivileged(
97 (PrivilegedAction<String>) () -> System.getProperty("os.name"));
98
99 private final static String SJISName = getSJISName();
100 private final static String EUCJPName = getEUCJPName();
101 private DelegatableDecoder detectedDecoder = null;
102
103 public Decoder(Charset cs) {
104 super(cs, 0.5f, 1.0f);
105 }
106
107 private static boolean isPlainASCII(byte b) {
108 return b >= 0 && b != 0x1b;
109 }
110
111 private static void copyLeadingASCII(ByteBuffer src, CharBuffer dst) {
112 int start = src.position();
113 int limit = start + Math.min(src.remaining(), dst.remaining());
114 int p;
115 byte b;
116 for (p = start; p < limit && isPlainASCII(b = src.get(p)); p++)
117 dst.put((char)(b & 0xff));
118 src.position(p);
119 }
120
208
209 public boolean isAutoDetecting() {
210 return true;
211 }
212
213 public boolean isCharsetDetected() {
214 return detectedDecoder != null;
215 }
216
217 public Charset detectedCharset() {
218 if (detectedDecoder == null)
219 throw new IllegalStateException("charset not yet detected");
220 return ((CharsetDecoder) detectedDecoder).charset();
221 }
222
223
224 /**
225 * Returned Shift_JIS Charset name is OS dependent
226 */
227 private static String getSJISName() {
228 if (osName.equals("Solaris") || osName.equals("SunOS"))
229 return("PCK");
230 else if (osName.startsWith("Windows"))
231 return("windows-31J");
232 else
233 return("Shift_JIS");
234 }
235
236 /**
237 * Returned EUC-JP Charset name is OS dependent
238 */
239
240 private static String getEUCJPName() {
241 if (osName.equals("Solaris") || osName.equals("SunOS"))
242 return("x-eucjp-open");
243 else
244 return("EUC_JP");
245 }
246
247 }
248 }
|
1 /*
2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
80 }
81
82 // A heuristic algorithm for guessing if EUC-decoded text really
83 // might be Japanese text. Better heuristics are possible...
84 private static boolean looksLikeJapanese(CharBuffer cb) {
85 int hiragana = 0; // Fullwidth Hiragana
86 int katakana = 0; // Halfwidth Katakana
87 while (cb.hasRemaining()) {
88 char c = cb.get();
89 if (0x3040 <= c && c <= 0x309f && ++hiragana > 1) return true;
90 if (0xff65 <= c && c <= 0xff9f && ++katakana > 1) return true;
91 }
92 return false;
93 }
94
95 private static class Decoder extends CharsetDecoder {
96 private final static String osName = AccessController.doPrivileged(
97 (PrivilegedAction<String>) () -> System.getProperty("os.name"));
98
99 private final static String SJISName = getSJISName();
100 private final static String EUCJPName = "EUC_JP";
101 private DelegatableDecoder detectedDecoder = null;
102
103 public Decoder(Charset cs) {
104 super(cs, 0.5f, 1.0f);
105 }
106
107 private static boolean isPlainASCII(byte b) {
108 return b >= 0 && b != 0x1b;
109 }
110
111 private static void copyLeadingASCII(ByteBuffer src, CharBuffer dst) {
112 int start = src.position();
113 int limit = start + Math.min(src.remaining(), dst.remaining());
114 int p;
115 byte b;
116 for (p = start; p < limit && isPlainASCII(b = src.get(p)); p++)
117 dst.put((char)(b & 0xff));
118 src.position(p);
119 }
120
208
209 public boolean isAutoDetecting() {
210 return true;
211 }
212
213 public boolean isCharsetDetected() {
214 return detectedDecoder != null;
215 }
216
217 public Charset detectedCharset() {
218 if (detectedDecoder == null)
219 throw new IllegalStateException("charset not yet detected");
220 return ((CharsetDecoder) detectedDecoder).charset();
221 }
222
223
224 /**
225 * Returned Shift_JIS Charset name is OS dependent
226 */
227 private static String getSJISName() {
228 if (osName.startsWith("Windows"))
229 return("windows-31J");
230 else
231 return("Shift_JIS");
232 }
233
234 }
235 }
|