1 /*
2 * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
328 * <a href="DataInput.html#modified-utf-8">modified UTF-8</a>
329 * encoding in a machine-independent manner.
330 * <p>
331 * First, two bytes are written to out as if by the <code>writeShort</code>
332 * method giving the number of bytes to follow. This value is the number of
333 * bytes actually written out, not the length of the string. Following the
334 * length, each character of the string is output, in sequence, using the
335 * modified UTF-8 encoding for the character. If no exception is thrown, the
336 * counter <code>written</code> is incremented by the total number of
337 * bytes written to the output stream. This will be at least two
338 * plus the length of <code>str</code>, and at most two plus
339 * thrice the length of <code>str</code>.
340 *
341 * @param str a string to be written.
342 * @param out destination to write to
343 * @return The number of bytes written out.
344 * @exception IOException if an I/O error occurs.
345 */
346 static int writeUTF(String str, DataOutput out) throws IOException {
347 int strlen = str.length();
348 int utflen = 0;
349 int c, count = 0;
350
351 /* use charAt instead of copying String to char array */
352 for (int i = 0; i < strlen; i++) {
353 c = str.charAt(i);
354 if ((c >= 0x0001) && (c <= 0x007F)) {
355 utflen++;
356 } else if (c > 0x07FF) {
357 utflen += 3;
358 } else {
359 utflen += 2;
360 }
361 }
362
363 if (utflen > 65535)
364 throw new UTFDataFormatException(
365 "encoded string too long: " + utflen + " bytes");
366
367 byte[] bytearr = null;
368 if (out instanceof DataOutputStream) {
369 DataOutputStream dos = (DataOutputStream)out;
370 if(dos.bytearr == null || (dos.bytearr.length < (utflen+2)))
371 dos.bytearr = new byte[(utflen*2) + 2];
372 bytearr = dos.bytearr;
373 } else {
374 bytearr = new byte[utflen+2];
375 }
376
377 bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
378 bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);
379
380 int i=0;
381 for (i=0; i<strlen; i++) {
382 c = str.charAt(i);
383 if (!((c >= 0x0001) && (c <= 0x007F))) break;
384 bytearr[count++] = (byte) c;
385 }
386
387 for (;i < strlen; i++){
388 c = str.charAt(i);
389 if ((c >= 0x0001) && (c <= 0x007F)) {
390 bytearr[count++] = (byte) c;
391
392 } else if (c > 0x07FF) {
393 bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
394 bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
395 bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
396 } else {
397 bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
398 bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
399 }
400 }
401 out.write(bytearr, 0, utflen+2);
402 return utflen + 2;
403 }
404
405 /**
406 * Returns the current value of the counter <code>written</code>,
407 * the number of bytes written to this data output stream so far.
408 * If the counter overflows, it will be wrapped to Integer.MAX_VALUE.
|
1 /*
2 * Copyright (c) 1994, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
328 * <a href="DataInput.html#modified-utf-8">modified UTF-8</a>
329 * encoding in a machine-independent manner.
330 * <p>
331 * First, two bytes are written to out as if by the <code>writeShort</code>
332 * method giving the number of bytes to follow. This value is the number of
333 * bytes actually written out, not the length of the string. Following the
334 * length, each character of the string is output, in sequence, using the
335 * modified UTF-8 encoding for the character. If no exception is thrown, the
336 * counter <code>written</code> is incremented by the total number of
337 * bytes written to the output stream. This will be at least two
338 * plus the length of <code>str</code>, and at most two plus
339 * thrice the length of <code>str</code>.
340 *
341 * @param str a string to be written.
342 * @param out destination to write to
343 * @return The number of bytes written out.
344 * @exception IOException if an I/O error occurs.
345 */
346 static int writeUTF(String str, DataOutput out) throws IOException {
347 int strlen = str.length();
348
349 // use charAt instead of copying String to char array
350 int utflen = 0;
351 for (int i = 0; i < strlen && utflen < 65536; i++) {
352 int c = str.charAt(i);
353 if ((c >= 0x0001) && (c <= 0x007F)) {
354 utflen++;
355 } else if (c > 0x07FF) {
356 utflen += 3;
357 } else {
358 utflen += 2;
359 }
360 }
361
362 if (utflen > 65535)
363 throw new UTFDataFormatException("encoded string too long");
364
365 byte[] bytearr = null;
366 if (out instanceof DataOutputStream) {
367 DataOutputStream dos = (DataOutputStream)out;
368 if(dos.bytearr == null || (dos.bytearr.length < (utflen+2)))
369 dos.bytearr = new byte[(utflen*2) + 2];
370 bytearr = dos.bytearr;
371 } else {
372 bytearr = new byte[utflen+2];
373 }
374
375 int count = 0;
376 bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
377 bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);
378
379 int i=0;
380 for (i=0; i<strlen; i++) {
381 int c = str.charAt(i);
382 if (!((c >= 0x0001) && (c <= 0x007F))) break;
383 bytearr[count++] = (byte) c;
384 }
385
386 for (;i < strlen; i++){
387 int c = str.charAt(i);
388 if ((c >= 0x0001) && (c <= 0x007F)) {
389 bytearr[count++] = (byte) c;
390
391 } else if (c > 0x07FF) {
392 bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
393 bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
394 bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
395 } else {
396 bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
397 bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
398 }
399 }
400 out.write(bytearr, 0, utflen+2);
401 return utflen + 2;
402 }
403
404 /**
405 * Returns the current value of the counter <code>written</code>,
406 * the number of bytes written to this data output stream so far.
407 * If the counter overflows, it will be wrapped to Integer.MAX_VALUE.
|