1 /* 2 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.CharBuffer; 29 import java.nio.charset.CoderResult; 30 import java.nio.charset.MalformedInputException; 31 import java.nio.charset.UnmappableCharacterException; 32 33 /** 34 * Utility class for dealing with surrogates. 35 * 36 * @author Mark Reinhold 37 * @author Martin Buchholz 38 * @author Ulf Zibis 39 */ 40 public class Surrogate { 41 42 private Surrogate() { } 43 44 // TODO: Deprecate/remove the following redundant definitions 45 public static final char MIN_HIGH = Character.MIN_HIGH_SURROGATE; 46 public static final char MAX_HIGH = Character.MAX_HIGH_SURROGATE; 47 public static final char MIN_LOW = Character.MIN_LOW_SURROGATE; 48 public static final char MAX_LOW = Character.MAX_LOW_SURROGATE; 49 public static final char MIN = Character.MIN_SURROGATE; 50 public static final char MAX = Character.MAX_SURROGATE; 51 public static final int UCS4_MIN = Character.MIN_SUPPLEMENTARY_CODE_POINT; 52 public static final int UCS4_MAX = Character.MAX_CODE_POINT; 53 54 /** 55 * Tells whether or not the given value is in the high surrogate range. 56 * Use of {@link Character#isHighSurrogate} is generally preferred. 57 */ 58 public static boolean isHigh(int c) { 59 return (MIN_HIGH <= c) && (c <= MAX_HIGH); 60 } 61 62 /** 63 * Tells whether or not the given value is in the low surrogate range. 64 * Use of {@link Character#isLowSurrogate} is generally preferred. 65 */ 66 public static boolean isLow(int c) { 67 return (MIN_LOW <= c) && (c <= MAX_LOW); 68 } 69 70 /** 71 * Tells whether or not the given value is in the surrogate range. 72 * Use of {@link Character#isSurrogate} is generally preferred. 73 */ 74 public static boolean is(int c) { 75 return (MIN <= c) && (c <= MAX); 76 } 77 78 /** 79 * Tells whether or not the given UCS-4 character must be represented as a 80 * surrogate pair in UTF-16. 81 * Use of {@link Character#isSupplementaryCodePoint} is generally preferred. 82 */ 83 public static boolean neededFor(int uc) { 84 return Character.isSupplementaryCodePoint(uc); 85 } 86 87 /** 88 * Returns the high UTF-16 surrogate for the given supplementary UCS-4 character. 89 * Use of {@link Character#highSurrogate} is generally preferred. 90 */ 91 public static char high(int uc) { 92 assert Character.isSupplementaryCodePoint(uc); 93 return Character.highSurrogate(uc); 94 } 95 96 /** 97 * Returns the low UTF-16 surrogate for the given supplementary UCS-4 character. 98 * Use of {@link Character#lowSurrogate} is generally preferred. 99 */ 100 public static char low(int uc) { 101 assert Character.isSupplementaryCodePoint(uc); 102 return Character.lowSurrogate(uc); 103 } 104 105 /** 106 * Converts the given surrogate pair into a 32-bit UCS-4 character. 107 * Use of {@link Character#toCodePoint} is generally preferred. 108 */ 109 public static int toUCS4(char c, char d) { 110 assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d); 111 return Character.toCodePoint(c, d); 112 } 113 114 /** 115 * Surrogate parsing support. Charset implementations may use instances of 116 * this class to handle the details of parsing UTF-16 surrogate pairs. 117 */ 118 public static class Parser { 119 120 public Parser() { } 121 122 private int character; // UCS-4 123 private CoderResult error = CoderResult.UNDERFLOW; 124 private boolean isPair; 125 126 /** 127 * Returns the UCS-4 character previously parsed. 128 */ 129 public int character() { 130 assert (error == null); 131 return character; 132 } 133 134 /** 135 * Tells whether or not the previously-parsed UCS-4 character was 136 * originally represented by a surrogate pair. 137 */ 138 public boolean isPair() { 139 assert (error == null); 140 return isPair; 141 } 142 143 /** 144 * Returns the number of UTF-16 characters consumed by the previous 145 * parse. 146 */ 147 public int increment() { 148 assert (error == null); 149 return isPair ? 2 : 1; 150 } 151 152 /** 153 * If the previous parse operation detected an error, return the object 154 * describing that error. 155 */ 156 public CoderResult error() { 157 assert (error != null); 158 return error; 159 } 160 161 /** 162 * Returns an unmappable-input result object, with the appropriate 163 * input length, for the previously-parsed character. 164 */ 165 public CoderResult unmappableResult() { 166 assert (error == null); 167 return CoderResult.unmappableForLength(isPair ? 2 : 1); 168 } 169 170 /** 171 * Parses a UCS-4 character from the given source buffer, handling 172 * surrogates. 173 * 174 * @param c The first character 175 * @param in The source buffer, from which one more character 176 * will be consumed if c is a high surrogate 177 * 178 * @return Either a parsed UCS-4 character, in which case the isPair() 179 * and increment() methods will return meaningful values, or 180 * -1, in which case error() will return a descriptive result 181 * object 182 */ 183 public int parse(char c, CharBuffer in) { 184 if (Character.isHighSurrogate(c)) { 185 if (!in.hasRemaining()) { 186 error = CoderResult.UNDERFLOW; 187 return -1; 188 } 189 char d = in.get(); 190 if (Character.isLowSurrogate(d)) { 191 character = Character.toCodePoint(c, d); 192 isPair = true; 193 error = null; 194 return character; 195 } 196 error = CoderResult.malformedForLength(1); 197 return -1; 198 } 199 if (Character.isLowSurrogate(c)) { 200 error = CoderResult.malformedForLength(1); 201 return -1; 202 } 203 character = c; 204 isPair = false; 205 error = null; 206 return character; 207 } 208 209 /** 210 * Parses a UCS-4 character from the given source buffer, handling 211 * surrogates. 212 * 213 * @param c The first character 214 * @param ia The input array, from which one more character 215 * will be consumed if c is a high surrogate 216 * @param ip The input index 217 * @param il The input limit 218 * 219 * @return Either a parsed UCS-4 character, in which case the isPair() 220 * and increment() methods will return meaningful values, or 221 * -1, in which case error() will return a descriptive result 222 * object 223 */ 224 public int parse(char c, char[] ia, int ip, int il) { 225 assert (ia[ip] == c); 226 if (Character.isHighSurrogate(c)) { 227 if (il - ip < 2) { 228 error = CoderResult.UNDERFLOW; 229 return -1; 230 } 231 char d = ia[ip + 1]; 232 if (Character.isLowSurrogate(d)) { 233 character = Character.toCodePoint(c, d); 234 isPair = true; 235 error = null; 236 return character; 237 } 238 error = CoderResult.malformedForLength(1); 239 return -1; 240 } 241 if (Character.isLowSurrogate(c)) { 242 error = CoderResult.malformedForLength(1); 243 return -1; 244 } 245 character = c; 246 isPair = false; 247 error = null; 248 return character; 249 } 250 251 } 252 253 /** 254 * Surrogate generation support. Charset implementations may use instances 255 * of this class to handle the details of generating UTF-16 surrogate 256 * pairs. 257 */ 258 public static class Generator { 259 260 public Generator() { } 261 262 private CoderResult error = CoderResult.OVERFLOW; 263 264 /** 265 * If the previous generation operation detected an error, return the 266 * object describing that error. 267 */ 268 public CoderResult error() { 269 assert error != null; 270 return error; 271 } 272 273 /** 274 * Generates one or two UTF-16 characters to represent the given UCS-4 275 * character. 276 * 277 * @param uc The UCS-4 character 278 * @param len The number of input bytes from which the UCS-4 value 279 * was constructed (used when creating result objects) 280 * @param dst The destination buffer, to which one or two UTF-16 281 * characters will be written 282 * 283 * @return Either a positive count of the number of UTF-16 characters 284 * written to the destination buffer, or -1, in which case 285 * error() will return a descriptive result object 286 */ 287 public int generate(int uc, int len, CharBuffer dst) { 288 if (Character.isBmpCodePoint(uc)) { 289 char c = (char) uc; 290 if (Character.isSurrogate(c)) { 291 error = CoderResult.malformedForLength(len); 292 return -1; 293 } 294 if (dst.remaining() < 1) { 295 error = CoderResult.OVERFLOW; 296 return -1; 297 } 298 dst.put(c); 299 error = null; 300 return 1; 301 } else if (Character.isValidCodePoint(uc)) { 302 if (dst.remaining() < 2) { 303 error = CoderResult.OVERFLOW; 304 return -1; 305 } 306 dst.put(Character.highSurrogate(uc)); 307 dst.put(Character.lowSurrogate(uc)); 308 error = null; 309 return 2; 310 } else { 311 error = CoderResult.unmappableForLength(len); 312 return -1; 313 } 314 } 315 316 /** 317 * Generates one or two UTF-16 characters to represent the given UCS-4 318 * character. 319 * 320 * @param uc The UCS-4 character 321 * @param len The number of input bytes from which the UCS-4 value 322 * was constructed (used when creating result objects) 323 * @param da The destination array, to which one or two UTF-16 324 * characters will be written 325 * @param dp The destination position 326 * @param dl The destination limit 327 * 328 * @return Either a positive count of the number of UTF-16 characters 329 * written to the destination buffer, or -1, in which case 330 * error() will return a descriptive result object 331 */ 332 public int generate(int uc, int len, char[] da, int dp, int dl) { 333 if (Character.isBmpCodePoint(uc)) { 334 char c = (char) uc; 335 if (Character.isSurrogate(c)) { 336 error = CoderResult.malformedForLength(len); 337 return -1; 338 } 339 if (dl - dp < 1) { 340 error = CoderResult.OVERFLOW; 341 return -1; 342 } 343 da[dp] = c; 344 error = null; 345 return 1; 346 } else if (Character.isValidCodePoint(uc)) { 347 if (dl - dp < 2) { 348 error = CoderResult.OVERFLOW; 349 return -1; 350 } 351 da[dp] = Character.highSurrogate(uc); 352 da[dp + 1] = Character.lowSurrogate(uc); 353 error = null; 354 return 2; 355 } else { 356 error = CoderResult.unmappableForLength(len); 357 return -1; 358 } 359 } 360 } 361 362 }