1 /*
   2  * Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.io;
  27 
  28 /**
  29  * The {@code DataInput} interface provides
  30  * for reading bytes from a binary stream and
  31  * reconstructing from them data in any of
  32  * the Java primitive types. There is also
  33  * a
  34  * facility for reconstructing a {@code String}
  35  * from data in
  36  * <a href="#modified-utf-8">modified UTF-8</a>
  37  * format.
  38  * <p>
  39  * It is generally true of all the reading
  40  * routines in this interface that if end of
  41  * file is reached before the desired number
  42  * of bytes has been read, an {@code EOFException}
  43  * (which is a kind of {@code IOException})
  44  * is thrown. If any byte cannot be read for
  45  * any reason other than end of file, an {@code IOException}
  46  * other than {@code EOFException} is
  47  * thrown. In particular, an {@code IOException}
  48  * may be thrown if the input stream has been
  49  * closed.
  50  *
  51  * <h3><a id="modified-utf-8">Modified UTF-8</a></h3>
  52  * <p>
  53  * Implementations of the DataInput and DataOutput interfaces represent
  54  * Unicode strings in a format that is a slight modification of UTF-8.
  55  * (For information regarding the standard UTF-8 format, see section
  56  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
  57  * 4.0</i>)
  58  *
  59  * <ul>
  60  * <li>Characters in the range {@code '\u005Cu0001'} to
  61  *         {@code '\u005Cu007F'} are represented by a single byte.
  62  * <li>The null character {@code '\u005Cu0000'} and characters
  63  *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
  64  *         represented by a pair of bytes.
  65  * <li>Characters in the range {@code '\u005Cu0800'}
  66  *         to {@code '\u005CuFFFF'} are represented by three bytes.
  67  * </ul>
  68  *
  69  *   <table class="plain" style="margin-left:2em;">
  70  *     <caption>Encoding of UTF-8 values</caption>
  71  *     <thead>
  72  *     <tr>
  73  *       <th scope="col" rowspan="2">Value</th>
  74  *       <th scope="col" rowspan="2">Byte</th>
  75  *       <th scope="col" colspan="8" id="bit_a">Bit Values</th>
  76  *     </tr>
  77  *     <tr>
  78  *       <!-- Value -->
  79  *       <!-- Byte -->
  80  *       <th scope="col" style="width:3em"> 7 </th>
  81  *       <th scope="col" style="width:3em"> 6 </th>
  82  *       <th scope="col" style="width:3em"> 5 </th>
  83  *       <th scope="col" style="width:3em"> 4 </th>
  84  *       <th scope="col" style="width:3em"> 3 </th>
  85  *       <th scope="col" style="width:3em"> 2 </th>
  86  *       <th scope="col" style="width:3em"> 1 </th>
  87  *       <th scope="col" style="width:3em"> 0 </th>
  88  *     </thead>
  89  *     <tbody>
  90  *     <tr>
  91  *       <th scope="row" style="text-align:left; font-weight:normal">
  92  *         {@code \u005Cu0001} to {@code \u005Cu007F} </th>
  93  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
  94  *       <td style="text-align:center">0
  95  *       <td colspan="7" style="text-align:right; padding-right:6em">bits 6-0
  96  *     </tr>
  97  *     <tr>
  98  *       <th scope="row" rowspan="2" style="text-align:left; font-weight:normal">
  99  *           {@code \u005Cu0000},<br>
 100  *           {@code \u005Cu0080} to {@code \u005Cu07FF} </th>
 101  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
 102  *       <td style="text-align:center">1
 103  *       <td style="text-align:center">1
 104  *       <td style="text-align:center">0
 105  *       <td colspan="5" style="text-align:right; padding-right:6em">bits 10-6
 106  *     </tr>
 107  *     <tr>
 108  *       <!-- (value) -->
 109  *       <th scope="row" style="font-weight:normal; text-align:center"> 2 </th>
 110  *       <td style="text-align:center">1
 111  *       <td style="text-align:center">0
 112  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 5-0
 113  *     </tr>
 114  *     <tr>
 115  *       <th scope="row" rowspan="3" style="text-align:left; font-weight:normal">
 116  *         {@code \u005Cu0800} to {@code \u005CuFFFF} </th>
 117  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
 118  *       <td style="text-align:center">1
 119  *       <td style="text-align:center">1
 120  *       <td style="text-align:center">1
 121  *       <td style="text-align:center">0
 122  *       <td colspan="4" style="text-align:right; padding-right:6em">bits 15-12
 123  *     </tr>
 124  *     <tr>
 125  *       <!-- (value) -->
 126  *       <th scope="row" style="font-weight:normal; text-align:center"> 2 </th>
 127  *       <td style="text-align:center">1
 128  *       <td style="text-align:center">0
 129  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 11-6
 130  *     </tr>
 131  *     <tr>
 132  *       <!-- (value) -->
 133  *       <th scope="row" style="font-weight:normal; text-align:center"> 3 </th>
 134  *       <td style="text-align:center">1
 135  *       <td style="text-align:center">0
 136  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 5-0
 137  *     </tr>
 138  *     </tbody>
 139  *   </table>
 140  *
 141  * <p>
 142  * The differences between this format and the
 143  * standard UTF-8 format are the following:
 144  * <ul>
 145  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
 146  *     rather than 1-byte, so that the encoded strings never have
 147  *     embedded nulls.
 148  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
 149  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
 150  *     are represented in the form of surrogate pairs.
 151  * </ul>
 152  * @author  Frank Yellin
 153  * @see     java.io.DataInputStream
 154  * @see     java.io.DataOutput
 155  * @since   1.0
 156  */
 157 public
 158 interface DataInput {
 159     /**
 160      * Reads some bytes from an input
 161      * stream and stores them into the buffer
 162      * array {@code b}. The number of bytes
 163      * read is equal
 164      * to the length of {@code b}.
 165      * <p>
 166      * This method blocks until one of the
 167      * following conditions occurs:
 168      * <ul>
 169      * <li>{@code b.length}
 170      * bytes of input data are available, in which
 171      * case a normal return is made.
 172      *
 173      * <li>End of
 174      * file is detected, in which case an {@code EOFException}
 175      * is thrown.
 176      *
 177      * <li>An I/O error occurs, in
 178      * which case an {@code IOException} other
 179      * than {@code EOFException} is thrown.
 180      * </ul>
 181      * <p>
 182      * If {@code b} is {@code null},
 183      * a {@code NullPointerException} is thrown.
 184      * If {@code b.length} is zero, then
 185      * no bytes are read. Otherwise, the first
 186      * byte read is stored into element {@code b[0]},
 187      * the next one into {@code b[1]}, and
 188      * so on.
 189      * If an exception is thrown from
 190      * this method, then it may be that some but
 191      * not all bytes of {@code b} have been
 192      * updated with data from the input stream.
 193      *
 194      * @param   b   the buffer into which the data is read.
 195      * @throws  NullPointerException if {@code b} is {@code null}.
 196      * @throws  EOFException  if this stream reaches the end before reading
 197      *          all the bytes.
 198      * @throws  IOException   if an I/O error occurs.
 199      */
 200     void readFully(byte b[]) throws IOException;
 201 
 202     /**
 203      *
 204      * Reads {@code len}
 205      * bytes from
 206      * an input stream.
 207      * <p>
 208      * This method
 209      * blocks until one of the following conditions
 210      * occurs:
 211      * <ul>
 212      * <li>{@code len} bytes
 213      * of input data are available, in which case
 214      * a normal return is made.
 215      *
 216      * <li>End of file
 217      * is detected, in which case an {@code EOFException}
 218      * is thrown.
 219      *
 220      * <li>An I/O error occurs, in
 221      * which case an {@code IOException} other
 222      * than {@code EOFException} is thrown.
 223      * </ul>
 224      * <p>
 225      * If {@code b} is {@code null},
 226      * a {@code NullPointerException} is thrown.
 227      * If {@code off} is negative, or {@code len}
 228      * is negative, or {@code off+len} is
 229      * greater than the length of the array {@code b},
 230      * then an {@code IndexOutOfBoundsException}
 231      * is thrown.
 232      * If {@code len} is zero,
 233      * then no bytes are read. Otherwise, the first
 234      * byte read is stored into element {@code b[off]},
 235      * the next one into {@code b[off+1]},
 236      * and so on. The number of bytes read is,
 237      * at most, equal to {@code len}.
 238      *
 239      * @param   b    the buffer into which the data is read.
 240      * @param   off  an int specifying the offset in the data array {@code b}.
 241      * @param   len  an int specifying the number of bytes to read.
 242      * @throws  NullPointerException if {@code b} is {@code null}.
 243      * @throws  IndexOutOfBoundsException if {@code off} is negative,
 244      *          {@code len} is negative, or {@code len} is greater than
 245      *          {@code b.length - off}.
 246      * @throws  EOFException  if this stream reaches the end before reading
 247      *          all the bytes.
 248      * @throws  IOException   if an I/O error occurs.
 249      */
 250     void readFully(byte b[], int off, int len) throws IOException;
 251 
 252     /**
 253      * Makes an attempt to skip over
 254      * {@code n} bytes
 255      * of data from the input
 256      * stream, discarding the skipped bytes. However,
 257      * it may skip
 258      * over some smaller number of
 259      * bytes, possibly zero. This may result from
 260      * any of a
 261      * number of conditions; reaching
 262      * end of file before {@code n} bytes
 263      * have been skipped is
 264      * only one possibility.
 265      * This method never throws an {@code EOFException}.
 266      * The actual
 267      * number of bytes skipped is returned.
 268      *
 269      * @param      n   the number of bytes to be skipped.
 270      * @return     the number of bytes actually skipped.
 271      * @exception  IOException   if an I/O error occurs.
 272      */
 273     int skipBytes(int n) throws IOException;
 274 
 275     /**
 276      * Reads one input byte and returns
 277      * {@code true} if that byte is nonzero,
 278      * {@code false} if that byte is zero.
 279      * This method is suitable for reading
 280      * the byte written by the {@code writeBoolean}
 281      * method of interface {@code DataOutput}.
 282      *
 283      * @return     the {@code boolean} value read.
 284      * @exception  EOFException  if this stream reaches the end before reading
 285      *               all the bytes.
 286      * @exception  IOException   if an I/O error occurs.
 287      */
 288     boolean readBoolean() throws IOException;
 289 
 290     /**
 291      * Reads and returns one input byte.
 292      * The byte is treated as a signed value in
 293      * the range {@code -128} through {@code 127},
 294      * inclusive.
 295      * This method is suitable for
 296      * reading the byte written by the {@code writeByte}
 297      * method of interface {@code DataOutput}.
 298      *
 299      * @return     the 8-bit value read.
 300      * @exception  EOFException  if this stream reaches the end before reading
 301      *               all the bytes.
 302      * @exception  IOException   if an I/O error occurs.
 303      */
 304     byte readByte() throws IOException;
 305 
 306     /**
 307      * Reads one input byte, zero-extends
 308      * it to type {@code int}, and returns
 309      * the result, which is therefore in the range
 310      * {@code 0}
 311      * through {@code 255}.
 312      * This method is suitable for reading
 313      * the byte written by the {@code writeByte}
 314      * method of interface {@code DataOutput}
 315      * if the argument to {@code writeByte}
 316      * was intended to be a value in the range
 317      * {@code 0} through {@code 255}.
 318      *
 319      * @return     the unsigned 8-bit value read.
 320      * @exception  EOFException  if this stream reaches the end before reading
 321      *               all the bytes.
 322      * @exception  IOException   if an I/O error occurs.
 323      */
 324     int readUnsignedByte() throws IOException;
 325 
 326     /**
 327      * Reads two input bytes and returns
 328      * a {@code short} value. Let {@code a}
 329      * be the first byte read and {@code b}
 330      * be the second byte. The value
 331      * returned
 332      * is:
 333      * <pre>{@code (short)((a << 8) | (b & 0xff))
 334      * }</pre>
 335      * This method
 336      * is suitable for reading the bytes written
 337      * by the {@code writeShort} method of
 338      * interface {@code DataOutput}.
 339      *
 340      * @return     the 16-bit value read.
 341      * @exception  EOFException  if this stream reaches the end before reading
 342      *               all the bytes.
 343      * @exception  IOException   if an I/O error occurs.
 344      */
 345     short readShort() throws IOException;
 346 
 347     /**
 348      * Reads two input bytes and returns
 349      * an {@code int} value in the range {@code 0}
 350      * through {@code 65535}. Let {@code a}
 351      * be the first byte read and
 352      * {@code b}
 353      * be the second byte. The value returned is:
 354      * <pre>{@code (((a & 0xff) << 8) | (b & 0xff))
 355      * }</pre>
 356      * This method is suitable for reading the bytes
 357      * written by the {@code writeShort} method
 358      * of interface {@code DataOutput}  if
 359      * the argument to {@code writeShort}
 360      * was intended to be a value in the range
 361      * {@code 0} through {@code 65535}.
 362      *
 363      * @return     the unsigned 16-bit value read.
 364      * @exception  EOFException  if this stream reaches the end before reading
 365      *               all the bytes.
 366      * @exception  IOException   if an I/O error occurs.
 367      */
 368     int readUnsignedShort() throws IOException;
 369 
 370     /**
 371      * Reads two input bytes and returns a {@code char} value.
 372      * Let {@code a}
 373      * be the first byte read and {@code b}
 374      * be the second byte. The value
 375      * returned is:
 376      * <pre>{@code (char)((a << 8) | (b & 0xff))
 377      * }</pre>
 378      * This method
 379      * is suitable for reading bytes written by
 380      * the {@code writeChar} method of interface
 381      * {@code DataOutput}.
 382      *
 383      * @return     the {@code char} value read.
 384      * @exception  EOFException  if this stream reaches the end before reading
 385      *               all the bytes.
 386      * @exception  IOException   if an I/O error occurs.
 387      */
 388     char readChar() throws IOException;
 389 
 390     /**
 391      * Reads four input bytes and returns an
 392      * {@code int} value. Let {@code a-d}
 393      * be the first through fourth bytes read. The value returned is:
 394      * <pre>{@code
 395      * (((a & 0xff) << 24) | ((b & 0xff) << 16) |
 396      *  ((c & 0xff) <<  8) | (d & 0xff))
 397      * }</pre>
 398      * This method is suitable
 399      * for reading bytes written by the {@code writeInt}
 400      * method of interface {@code DataOutput}.
 401      *
 402      * @return     the {@code int} value read.
 403      * @exception  EOFException  if this stream reaches the end before reading
 404      *               all the bytes.
 405      * @exception  IOException   if an I/O error occurs.
 406      */
 407     int readInt() throws IOException;
 408 
 409     /**
 410      * Reads eight input bytes and returns
 411      * a {@code long} value. Let {@code a-h}
 412      * be the first through eighth bytes read.
 413      * The value returned is:
 414      * <pre>{@code
 415      * (((long)(a & 0xff) << 56) |
 416      *  ((long)(b & 0xff) << 48) |
 417      *  ((long)(c & 0xff) << 40) |
 418      *  ((long)(d & 0xff) << 32) |
 419      *  ((long)(e & 0xff) << 24) |
 420      *  ((long)(f & 0xff) << 16) |
 421      *  ((long)(g & 0xff) <<  8) |
 422      *  ((long)(h & 0xff)))
 423      * }</pre>
 424      * <p>
 425      * This method is suitable
 426      * for reading bytes written by the {@code writeLong}
 427      * method of interface {@code DataOutput}.
 428      *
 429      * @return     the {@code long} value read.
 430      * @exception  EOFException  if this stream reaches the end before reading
 431      *               all the bytes.
 432      * @exception  IOException   if an I/O error occurs.
 433      */
 434     long readLong() throws IOException;
 435 
 436     /**
 437      * Reads four input bytes and returns
 438      * a {@code float} value. It does this
 439      * by first constructing an {@code int}
 440      * value in exactly the manner
 441      * of the {@code readInt}
 442      * method, then converting this {@code int}
 443      * value to a {@code float} in
 444      * exactly the manner of the method {@code Float.intBitsToFloat}.
 445      * This method is suitable for reading
 446      * bytes written by the {@code writeFloat}
 447      * method of interface {@code DataOutput}.
 448      *
 449      * @return     the {@code float} value read.
 450      * @exception  EOFException  if this stream reaches the end before reading
 451      *               all the bytes.
 452      * @exception  IOException   if an I/O error occurs.
 453      */
 454     float readFloat() throws IOException;
 455 
 456     /**
 457      * Reads eight input bytes and returns
 458      * a {@code double} value. It does this
 459      * by first constructing a {@code long}
 460      * value in exactly the manner
 461      * of the {@code readLong}
 462      * method, then converting this {@code long}
 463      * value to a {@code double} in exactly
 464      * the manner of the method {@code Double.longBitsToDouble}.
 465      * This method is suitable for reading
 466      * bytes written by the {@code writeDouble}
 467      * method of interface {@code DataOutput}.
 468      *
 469      * @return     the {@code double} value read.
 470      * @exception  EOFException  if this stream reaches the end before reading
 471      *               all the bytes.
 472      * @exception  IOException   if an I/O error occurs.
 473      */
 474     double readDouble() throws IOException;
 475 
 476     /**
 477      * Reads the next line of text from the input stream.
 478      * It reads successive bytes, converting
 479      * each byte separately into a character,
 480      * until it encounters a line terminator or
 481      * end of
 482      * file; the characters read are then
 483      * returned as a {@code String}. Note
 484      * that because this
 485      * method processes bytes,
 486      * it does not support input of the full Unicode
 487      * character set.
 488      * <p>
 489      * If end of file is encountered
 490      * before even one byte can be read, then {@code null}
 491      * is returned. Otherwise, each byte that is
 492      * read is converted to type {@code char}
 493      * by zero-extension. If the character {@code '\n'}
 494      * is encountered, it is discarded and reading
 495      * ceases. If the character {@code '\r'}
 496      * is encountered, it is discarded and, if
 497      * the following byte converts  to the
 498      * character {@code '\n'}, then that is
 499      * discarded also; reading then ceases. If
 500      * end of file is encountered before either
 501      * of the characters {@code '\n'} and
 502      * {@code '\r'} is encountered, reading
 503      * ceases. Once reading has ceased, a {@code String}
 504      * is returned that contains all the characters
 505      * read and not discarded, taken in order.
 506      * Note that every character in this string
 507      * will have a value less than {@code \u005Cu0100},
 508      * that is, {@code (char)256}.
 509      *
 510      * @return the next line of text from the input stream,
 511      *         or {@code null} if the end of file is
 512      *         encountered before a byte can be read.
 513      * @exception  IOException  if an I/O error occurs.
 514      */
 515     String readLine() throws IOException;
 516 
 517     /**
 518      * Reads in a string that has been encoded using a
 519      * <a href="#modified-utf-8">modified UTF-8</a>
 520      * format.
 521      * The general contract of {@code readUTF}
 522      * is that it reads a representation of a Unicode
 523      * character string encoded in modified
 524      * UTF-8 format; this string of characters
 525      * is then returned as a {@code String}.
 526      * <p>
 527      * First, two bytes are read and used to
 528      * construct an unsigned 16-bit integer in
 529      * exactly the manner of the {@code readUnsignedShort}
 530      * method . This integer value is called the
 531      * <i>UTF length</i> and specifies the number
 532      * of additional bytes to be read. These bytes
 533      * are then converted to characters by considering
 534      * them in groups. The length of each group
 535      * is computed from the value of the first
 536      * byte of the group. The byte following a
 537      * group, if any, is the first byte of the
 538      * next group.
 539      * <p>
 540      * If the first byte of a group
 541      * matches the bit pattern {@code 0xxxxxxx}
 542      * (where {@code x} means "may be {@code 0}
 543      * or {@code 1}"), then the group consists
 544      * of just that byte. The byte is zero-extended
 545      * to form a character.
 546      * <p>
 547      * If the first byte
 548      * of a group matches the bit pattern {@code 110xxxxx},
 549      * then the group consists of that byte {@code a}
 550      * and a second byte {@code b}. If there
 551      * is no byte {@code b} (because byte
 552      * {@code a} was the last of the bytes
 553      * to be read), or if byte {@code b} does
 554      * not match the bit pattern {@code 10xxxxxx},
 555      * then a {@code UTFDataFormatException}
 556      * is thrown. Otherwise, the group is converted
 557      * to the character:
 558      * <pre>{@code (char)(((a & 0x1F) << 6) | (b & 0x3F))
 559      * }</pre>
 560      * If the first byte of a group
 561      * matches the bit pattern {@code 1110xxxx},
 562      * then the group consists of that byte {@code a}
 563      * and two more bytes {@code b} and {@code c}.
 564      * If there is no byte {@code c} (because
 565      * byte {@code a} was one of the last
 566      * two of the bytes to be read), or either
 567      * byte {@code b} or byte {@code c}
 568      * does not match the bit pattern {@code 10xxxxxx},
 569      * then a {@code UTFDataFormatException}
 570      * is thrown. Otherwise, the group is converted
 571      * to the character:
 572      * <pre>{@code
 573      * (char)(((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F))
 574      * }</pre>
 575      * If the first byte of a group matches the
 576      * pattern {@code 1111xxxx} or the pattern
 577      * {@code 10xxxxxx}, then a {@code UTFDataFormatException}
 578      * is thrown.
 579      * <p>
 580      * If end of file is encountered
 581      * at any time during this entire process,
 582      * then an {@code EOFException} is thrown.
 583      * <p>
 584      * After every group has been converted to
 585      * a character by this process, the characters
 586      * are gathered, in the same order in which
 587      * their corresponding groups were read from
 588      * the input stream, to form a {@code String},
 589      * which is returned.
 590      * <p>
 591      * The {@code writeUTF}
 592      * method of interface {@code DataOutput}
 593      * may be used to write data that is suitable
 594      * for reading by this method.
 595      * @return     a Unicode string.
 596      * @exception  EOFException            if this stream reaches the end
 597      *               before reading all the bytes.
 598      * @exception  IOException             if an I/O error occurs.
 599      * @exception  UTFDataFormatException  if the bytes do not represent a
 600      *               valid modified UTF-8 encoding of a string.
 601      */
 602     String readUTF() throws IOException;
 603 }