1 /*
   2  * Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.io;
  27 
  28 /**
  29  * The {@code DataInput} interface provides
  30  * for reading bytes from a binary stream and
  31  * reconstructing from them data in any of
  32  * the Java primitive types. There is also
  33  * a
  34  * facility for reconstructing a {@code String}
  35  * from data in
  36  * <a href="#modified-utf-8">modified UTF-8</a>
  37  * format.
  38  * <p>
  39  * It is generally true of all the reading
  40  * routines in this interface that if end of
  41  * file is reached before the desired number
  42  * of bytes has been read, an {@code EOFException}
  43  * (which is a kind of {@code IOException})
  44  * is thrown. If any byte cannot be read for
  45  * any reason other than end of file, an {@code IOException}
  46  * other than {@code EOFException} is
  47  * thrown. In particular, an {@code IOException}
  48  * may be thrown if the input stream has been
  49  * closed.
  50  *
  51  * <h4><a name="modified-utf-8">Modified UTF-8</a></h4>
  52  * <p>
  53  * Implementations of the DataInput and DataOutput interfaces represent
  54  * Unicode strings in a format that is a slight modification of UTF-8.
  55  * (For information regarding the standard UTF-8 format, see section
  56  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
  57  * 4.0</i>).
  58  * Note that in the following tables, the most significant bit appears in the
  59  * far left-hand column.
  60  * <p>
  61  * All characters in the range {@code '\u005Cu0001'} to
  62  * {@code '\u005Cu007F'} are represented by a single byte:
  63  *
  64  * <blockquote>
  65  *   <table border="1" cellspacing="0" cellpadding="8" width="50%"
  66  *          summary="Bit values and bytes">
  67  *     <tr>
  68  *       <td></td>
  69  *       <th id="bit_a">Bit Values</th>
  70  *     </tr>
  71  *     <tr>
  72  *       <th id="byte1_a">Byte 1</th>
  73  *       <td>
  74  *         <table border="1" cellspacing="0" width="100%">
  75  *           <tr>
  76  *             <td width="12%"><center>0</center>
  77  *             <td colspan="7"><center>bits 6-0</center>
  78  *           </tr>
  79  *         </table>
  80  *       </td>
  81  *     </tr>
  82  *   </table>
  83  * </blockquote>
  84  *
  85  * <p>
  86  * The null character {@code '\u005Cu0000'} and characters in the
  87  * range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
  88  * represented by a pair of bytes:
  89  *
  90  * <blockquote>
  91  *   <table border="1" cellspacing="0" cellpadding="8" width="50%"
  92  *          summary="Bit values and bytes">
  93  *     <tr>
  94  *       <td></td>
  95  *       <th id="bit_b">Bit Values</th>
  96  *     </tr>
  97  *     <tr>
  98  *       <th id="byte1_b">Byte 1</th>
  99  *       <td>
 100  *         <table border="1" cellspacing="0" width="100%">
 101  *           <tr>
 102  *             <td width="12%"><center>1</center>
 103  *             <td width="13%"><center>1</center>
 104  *             <td width="12%"><center>0</center>
 105  *             <td colspan="5"><center>bits 10-6</center>
 106  *           </tr>
 107  *         </table>
 108  *       </td>
 109  *     </tr>
 110  *     <tr>
 111  *       <th id="byte2_a">Byte 2</th>
 112  *       <td>
 113  *         <table border="1" cellspacing="0" width="100%">
 114  *           <tr>
 115  *             <td width="12%"><center>1</center>
 116  *             <td width="13%"><center>0</center>
 117  *             <td colspan="6"><center>bits 5-0</center>
 118  *           </tr>
 119  *         </table>
 120  *       </td>
 121  *     </tr>
 122  *   </table>
 123  *  </blockquote>
 124  *
 125  * <br>
 126  * {@code char} values in the range {@code '\u005Cu0800'} to
 127  * {@code '\u005CuFFFF'} are represented by three bytes:
 128  *
 129  * <blockquote>
 130  *   <table border="1" cellspacing="0" cellpadding="8" width="50%"
 131  *          summary="Bit values and bytes">
 132  *     <tr>
 133  *       <td></td>
 134  *       <th id="bit_c">Bit Values</th>
 135  *     </tr>
 136  *     <tr>
 137  *       <th id="byte1_c">Byte 1</th>
 138  *       <td>
 139  *         <table border="1" cellspacing="0" width="100%">
 140  *           <tr>
 141  *             <td width="12%"><center>1</center>
 142  *             <td width="13%"><center>1</center>
 143  *             <td width="12%"><center>1</center>
 144  *             <td width="13%"><center>0</center>
 145  *             <td colspan="4"><center>bits 15-12</center>
 146  *           </tr>
 147  *         </table>
 148  *       </td>
 149  *     </tr>
 150  *     <tr>
 151  *       <th id="byte2_b">Byte 2</th>
 152  *       <td>
 153  *         <table border="1" cellspacing="0" width="100%">
 154  *           <tr>
 155  *             <td width="12%"><center>1</center>
 156  *             <td width="13%"><center>0</center>
 157  *             <td colspan="6"><center>bits 11-6</center>
 158  *           </tr>
 159  *         </table>
 160  *       </td>
 161  *     </tr>
 162  *     <tr>
 163  *       <th id="byte3">Byte 3</th>
 164  *       <td>
 165  *         <table border="1" cellspacing="0" width="100%">
 166  *           <tr>
 167  *             <td width="12%"><center>1</center>
 168  *             <td width="13%"><center>0</center>
 169  *             <td colspan="6"><center>bits 5-0</center>
 170  *           </tr>
 171  *         </table>
 172  *       </td>
 173  *     </tr>
 174  *   </table>
 175  *  </blockquote>
 176  *
 177  * <p>
 178  * The differences between this format and the
 179  * standard UTF-8 format are the following:
 180  * <ul>
 181  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
 182  *     rather than 1-byte, so that the encoded strings never have
 183  *     embedded nulls.
 184  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
 185  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
 186  *     are represented in the form of surrogate pairs.
 187  * </ul>
 188  * @author  Frank Yellin
 189  * @see     java.io.DataInputStream
 190  * @see     java.io.DataOutput
 191  * @since   JDK1.0
 192  */
 193 public
 194 interface DataInput {
 195     /**
 196      * Reads some bytes from an input
 197      * stream and stores them into the buffer
 198      * array {@code b}. The number of bytes
 199      * read is equal
 200      * to the length of {@code b}.
 201      * <p>
 202      * This method blocks until one of the
 203      * following conditions occurs:<p>
 204      * <ul>
 205      * <li>{@code b.length}
 206      * bytes of input data are available, in which
 207      * case a normal return is made.
 208      *
 209      * <li>End of
 210      * file is detected, in which case an {@code EOFException}
 211      * is thrown.
 212      *
 213      * <li>An I/O error occurs, in
 214      * which case an {@code IOException} other
 215      * than {@code EOFException} is thrown.
 216      * </ul>
 217      * <p>
 218      * If {@code b} is {@code null},
 219      * a {@code NullPointerException} is thrown.
 220      * If {@code b.length} is zero, then
 221      * no bytes are read. Otherwise, the first
 222      * byte read is stored into element {@code b[0]},
 223      * the next one into {@code b[1]}, and
 224      * so on.
 225      * If an exception is thrown from
 226      * this method, then it may be that some but
 227      * not all bytes of {@code b} have been
 228      * updated with data from the input stream.
 229      *
 230      * @param     b   the buffer into which the data is read.
 231      * @exception  EOFException  if this stream reaches the end before reading
 232      *               all the bytes.
 233      * @exception  IOException   if an I/O error occurs.
 234      */
 235     void readFully(byte b[]) throws IOException;
 236 
 237     /**
 238      *
 239      * Reads {@code len}
 240      * bytes from
 241      * an input stream.
 242      * <p>
 243      * This method
 244      * blocks until one of the following conditions
 245      * occurs:<p>
 246      * <ul>
 247      * <li>{@code len} bytes
 248      * of input data are available, in which case
 249      * a normal return is made.
 250      *
 251      * <li>End of file
 252      * is detected, in which case an {@code EOFException}
 253      * is thrown.
 254      *
 255      * <li>An I/O error occurs, in
 256      * which case an {@code IOException} other
 257      * than {@code EOFException} is thrown.
 258      * </ul>
 259      * <p>
 260      * If {@code b} is {@code null},
 261      * a {@code NullPointerException} is thrown.
 262      * If {@code off} is negative, or {@code len}
 263      * is negative, or {@code off+len} is
 264      * greater than the length of the array {@code b},
 265      * then an {@code IndexOutOfBoundsException}
 266      * is thrown.
 267      * If {@code len} is zero,
 268      * then no bytes are read. Otherwise, the first
 269      * byte read is stored into element {@code b[off]},
 270      * the next one into {@code b[off+1]},
 271      * and so on. The number of bytes read is,
 272      * at most, equal to {@code len}.
 273      *
 274      * @param     b   the buffer into which the data is read.
 275      * @param off  an int specifying the offset into the data.
 276      * @param len  an int specifying the number of bytes to read.
 277      * @exception  EOFException  if this stream reaches the end before reading
 278      *               all the bytes.
 279      * @exception  IOException   if an I/O error occurs.
 280      */
 281     void readFully(byte b[], int off, int len) throws IOException;
 282 
 283     /**
 284      * Makes an attempt to skip over
 285      * {@code n} bytes
 286      * of data from the input
 287      * stream, discarding the skipped bytes. However,
 288      * it may skip
 289      * over some smaller number of
 290      * bytes, possibly zero. This may result from
 291      * any of a
 292      * number of conditions; reaching
 293      * end of file before {@code n} bytes
 294      * have been skipped is
 295      * only one possibility.
 296      * This method never throws an {@code EOFException}.
 297      * The actual
 298      * number of bytes skipped is returned.
 299      *
 300      * @param      n   the number of bytes to be skipped.
 301      * @return     the number of bytes actually skipped.
 302      * @exception  IOException   if an I/O error occurs.
 303      */
 304     int skipBytes(int n) throws IOException;
 305 
 306     /**
 307      * Reads one input byte and returns
 308      * {@code true} if that byte is nonzero,
 309      * {@code false} if that byte is zero.
 310      * This method is suitable for reading
 311      * the byte written by the {@code writeBoolean}
 312      * method of interface {@code DataOutput}.
 313      *
 314      * @return     the {@code boolean} value read.
 315      * @exception  EOFException  if this stream reaches the end before reading
 316      *               all the bytes.
 317      * @exception  IOException   if an I/O error occurs.
 318      */
 319     boolean readBoolean() throws IOException;
 320 
 321     /**
 322      * Reads and returns one input byte.
 323      * The byte is treated as a signed value in
 324      * the range {@code -128} through {@code 127},
 325      * inclusive.
 326      * This method is suitable for
 327      * reading the byte written by the {@code writeByte}
 328      * method of interface {@code DataOutput}.
 329      *
 330      * @return     the 8-bit value read.
 331      * @exception  EOFException  if this stream reaches the end before reading
 332      *               all the bytes.
 333      * @exception  IOException   if an I/O error occurs.
 334      */
 335     byte readByte() throws IOException;
 336 
 337     /**
 338      * Reads one input byte, zero-extends
 339      * it to type {@code int}, and returns
 340      * the result, which is therefore in the range
 341      * {@code 0}
 342      * through {@code 255}.
 343      * This method is suitable for reading
 344      * the byte written by the {@code writeByte}
 345      * method of interface {@code DataOutput}
 346      * if the argument to {@code writeByte}
 347      * was intended to be a value in the range
 348      * {@code 0} through {@code 255}.
 349      *
 350      * @return     the unsigned 8-bit value read.
 351      * @exception  EOFException  if this stream reaches the end before reading
 352      *               all the bytes.
 353      * @exception  IOException   if an I/O error occurs.
 354      */
 355     int readUnsignedByte() throws IOException;
 356 
 357     /**
 358      * Reads two input bytes and returns
 359      * a {@code short} value. Let {@code a}
 360      * be the first byte read and {@code b}
 361      * be the second byte. The value
 362      * returned
 363      * is:
 364      * <p><pre><code>(short)((a &lt;&lt; 8) | (b &amp; 0xff))
 365      * </code></pre>
 366      * This method
 367      * is suitable for reading the bytes written
 368      * by the {@code writeShort} method of
 369      * interface {@code DataOutput}.
 370      *
 371      * @return     the 16-bit value read.
 372      * @exception  EOFException  if this stream reaches the end before reading
 373      *               all the bytes.
 374      * @exception  IOException   if an I/O error occurs.
 375      */
 376     short readShort() throws IOException;
 377 
 378     /**
 379      * Reads two input bytes and returns
 380      * an {@code int} value in the range {@code 0}
 381      * through {@code 65535}. Let {@code a}
 382      * be the first byte read and
 383      * {@code b}
 384      * be the second byte. The value returned is:
 385      * <p><pre><code>(((a &amp; 0xff) &lt;&lt; 8) | (b &amp; 0xff))
 386      * </code></pre>
 387      * This method is suitable for reading the bytes
 388      * written by the {@code writeShort} method
 389      * of interface {@code DataOutput}  if
 390      * the argument to {@code writeShort}
 391      * was intended to be a value in the range
 392      * {@code 0} through {@code 65535}.
 393      *
 394      * @return     the unsigned 16-bit value read.
 395      * @exception  EOFException  if this stream reaches the end before reading
 396      *               all the bytes.
 397      * @exception  IOException   if an I/O error occurs.
 398      */
 399     int readUnsignedShort() throws IOException;
 400 
 401     /**
 402      * Reads two input bytes and returns a {@code char} value.
 403      * Let {@code a}
 404      * be the first byte read and {@code b}
 405      * be the second byte. The value
 406      * returned is:
 407      * <p><pre><code>(char)((a &lt;&lt; 8) | (b &amp; 0xff))
 408      * </code></pre>
 409      * This method
 410      * is suitable for reading bytes written by
 411      * the {@code writeChar} method of interface
 412      * {@code DataOutput}.
 413      *
 414      * @return     the {@code char} value read.
 415      * @exception  EOFException  if this stream reaches the end before reading
 416      *               all the bytes.
 417      * @exception  IOException   if an I/O error occurs.
 418      */
 419     char readChar() throws IOException;
 420 
 421     /**
 422      * Reads four input bytes and returns an
 423      * {@code int} value. Let {@code a-d}
 424      * be the first through fourth bytes read. The value returned is:
 425      * <p><pre><code>
 426      * (((a &amp; 0xff) &lt;&lt; 24) | ((b &amp; 0xff) &lt;&lt; 16) |
 427      * &#32;((c &amp; 0xff) &lt;&lt; 8) | (d &amp; 0xff))
 428      * </code></pre>
 429      * This method is suitable
 430      * for reading bytes written by the {@code writeInt}
 431      * method of interface {@code DataOutput}.
 432      *
 433      * @return     the {@code int} value read.
 434      * @exception  EOFException  if this stream reaches the end before reading
 435      *               all the bytes.
 436      * @exception  IOException   if an I/O error occurs.
 437      */
 438     int readInt() throws IOException;
 439 
 440     /**
 441      * Reads eight input bytes and returns
 442      * a {@code long} value. Let {@code a-h}
 443      * be the first through eighth bytes read.
 444      * The value returned is:
 445      * <p><pre><code>
 446      * (((long)(a &amp; 0xff) &lt;&lt; 56) |
 447      *  ((long)(b &amp; 0xff) &lt;&lt; 48) |
 448      *  ((long)(c &amp; 0xff) &lt;&lt; 40) |
 449      *  ((long)(d &amp; 0xff) &lt;&lt; 32) |
 450      *  ((long)(e &amp; 0xff) &lt;&lt; 24) |
 451      *  ((long)(f &amp; 0xff) &lt;&lt; 16) |
 452      *  ((long)(g &amp; 0xff) &lt;&lt;  8) |
 453      *  ((long)(h &amp; 0xff)))
 454      * </code></pre>
 455      * <p>
 456      * This method is suitable
 457      * for reading bytes written by the {@code writeLong}
 458      * method of interface {@code DataOutput}.
 459      *
 460      * @return     the {@code long} value read.
 461      * @exception  EOFException  if this stream reaches the end before reading
 462      *               all the bytes.
 463      * @exception  IOException   if an I/O error occurs.
 464      */
 465     long readLong() throws IOException;
 466 
 467     /**
 468      * Reads four input bytes and returns
 469      * a {@code float} value. It does this
 470      * by first constructing an {@code int}
 471      * value in exactly the manner
 472      * of the {@code readInt}
 473      * method, then converting this {@code int}
 474      * value to a {@code float} in
 475      * exactly the manner of the method {@code Float.intBitsToFloat}.
 476      * This method is suitable for reading
 477      * bytes written by the {@code writeFloat}
 478      * method of interface {@code DataOutput}.
 479      *
 480      * @return     the {@code float} value read.
 481      * @exception  EOFException  if this stream reaches the end before reading
 482      *               all the bytes.
 483      * @exception  IOException   if an I/O error occurs.
 484      */
 485     float readFloat() throws IOException;
 486 
 487     /**
 488      * Reads eight input bytes and returns
 489      * a {@code double} value. It does this
 490      * by first constructing a {@code long}
 491      * value in exactly the manner
 492      * of the {@code readlong}
 493      * method, then converting this {@code long}
 494      * value to a {@code double} in exactly
 495      * the manner of the method {@code Double.longBitsToDouble}.
 496      * This method is suitable for reading
 497      * bytes written by the {@code writeDouble}
 498      * method of interface {@code DataOutput}.
 499      *
 500      * @return     the {@code double} value read.
 501      * @exception  EOFException  if this stream reaches the end before reading
 502      *               all the bytes.
 503      * @exception  IOException   if an I/O error occurs.
 504      */
 505     double readDouble() throws IOException;
 506 
 507     /**
 508      * Reads the next line of text from the input stream.
 509      * It reads successive bytes, converting
 510      * each byte separately into a character,
 511      * until it encounters a line terminator or
 512      * end of
 513      * file; the characters read are then
 514      * returned as a {@code String}. Note
 515      * that because this
 516      * method processes bytes,
 517      * it does not support input of the full Unicode
 518      * character set.
 519      * <p>
 520      * If end of file is encountered
 521      * before even one byte can be read, then {@code null}
 522      * is returned. Otherwise, each byte that is
 523      * read is converted to type {@code char}
 524      * by zero-extension. If the character {@code '\n'}
 525      * is encountered, it is discarded and reading
 526      * ceases. If the character {@code '\r'}
 527      * is encountered, it is discarded and, if
 528      * the following byte converts &#32;to the
 529      * character {@code '\n'}, then that is
 530      * discarded also; reading then ceases. If
 531      * end of file is encountered before either
 532      * of the characters {@code '\n'} and
 533      * {@code '\r'} is encountered, reading
 534      * ceases. Once reading has ceased, a {@code String}
 535      * is returned that contains all the characters
 536      * read and not discarded, taken in order.
 537      * Note that every character in this string
 538      * will have a value less than {@code \u005Cu0100},
 539      * that is, {@code (char)256}.
 540      *
 541      * @return the next line of text from the input stream,
 542      *         or {@code null} if the end of file is
 543      *         encountered before a byte can be read.
 544      * @exception  IOException  if an I/O error occurs.
 545      */
 546     String readLine() throws IOException;
 547 
 548     /**
 549      * Reads in a string that has been encoded using a
 550      * <a href="#modified-utf-8">modified UTF-8</a>
 551      * format.
 552      * The general contract of {@code readUTF}
 553      * is that it reads a representation of a Unicode
 554      * character string encoded in modified
 555      * UTF-8 format; this string of characters
 556      * is then returned as a {@code String}.
 557      * <p>
 558      * First, two bytes are read and used to
 559      * construct an unsigned 16-bit integer in
 560      * exactly the manner of the {@code readUnsignedShort}
 561      * method . This integer value is called the
 562      * <i>UTF length</i> and specifies the number
 563      * of additional bytes to be read. These bytes
 564      * are then converted to characters by considering
 565      * them in groups. The length of each group
 566      * is computed from the value of the first
 567      * byte of the group. The byte following a
 568      * group, if any, is the first byte of the
 569      * next group.
 570      * <p>
 571      * If the first byte of a group
 572      * matches the bit pattern {@code 0xxxxxxx}
 573      * (where {@code x} means "may be {@code 0}
 574      * or {@code 1}"), then the group consists
 575      * of just that byte. The byte is zero-extended
 576      * to form a character.
 577      * <p>
 578      * If the first byte
 579      * of a group matches the bit pattern {@code 110xxxxx},
 580      * then the group consists of that byte {@code a}
 581      * and a second byte {@code b}. If there
 582      * is no byte {@code b} (because byte
 583      * {@code a} was the last of the bytes
 584      * to be read), or if byte {@code b} does
 585      * not match the bit pattern {@code 10xxxxxx},
 586      * then a {@code UTFDataFormatException}
 587      * is thrown. Otherwise, the group is converted
 588      * to the character:<p>
 589      * <pre><code>(char)(((a&amp; 0x1F) &lt;&lt; 6) | (b &amp; 0x3F))
 590      * </code></pre>
 591      * If the first byte of a group
 592      * matches the bit pattern {@code 1110xxxx},
 593      * then the group consists of that byte {@code a}
 594      * and two more bytes {@code b} and {@code c}.
 595      * If there is no byte {@code c} (because
 596      * byte {@code a} was one of the last
 597      * two of the bytes to be read), or either
 598      * byte {@code b} or byte {@code c}
 599      * does not match the bit pattern {@code 10xxxxxx},
 600      * then a {@code UTFDataFormatException}
 601      * is thrown. Otherwise, the group is converted
 602      * to the character:<p>
 603      * <pre><code>
 604      * (char)(((a &amp; 0x0F) &lt;&lt; 12) | ((b &amp; 0x3F) &lt;&lt; 6) | (c &amp; 0x3F))
 605      * </code></pre>
 606      * If the first byte of a group matches the
 607      * pattern {@code 1111xxxx} or the pattern
 608      * {@code 10xxxxxx}, then a {@code UTFDataFormatException}
 609      * is thrown.
 610      * <p>
 611      * If end of file is encountered
 612      * at any time during this entire process,
 613      * then an {@code EOFException} is thrown.
 614      * <p>
 615      * After every group has been converted to
 616      * a character by this process, the characters
 617      * are gathered, in the same order in which
 618      * their corresponding groups were read from
 619      * the input stream, to form a {@code String},
 620      * which is returned.
 621      * <p>
 622      * The {@code writeUTF}
 623      * method of interface {@code DataOutput}
 624      * may be used to write data that is suitable
 625      * for reading by this method.
 626      * @return     a Unicode string.
 627      * @exception  EOFException            if this stream reaches the end
 628      *               before reading all the bytes.
 629      * @exception  IOException             if an I/O error occurs.
 630      * @exception  UTFDataFormatException  if the bytes do not represent a
 631      *               valid modified UTF-8 encoding of a string.
 632      */
 633     String readUTF() throws IOException;
 634 }